1 #include "bddc.h" 2 #include "bddcprivate.h" 3 #include <petscblaslapack.h> 4 5 #undef __FUNCT__ 6 #define __FUNCT__ "PCBDDCSetUpSolvers" 7 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 8 { 9 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 10 PetscScalar *coarse_submat_vals; 11 PetscErrorCode ierr; 12 13 PetscFunctionBegin; 14 /* Compute matrix after change of basis and extract local submatrices */ 15 ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr); 16 17 /* Setup local scatters R_to_B and (optionally) R_to_D */ 18 /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */ 19 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 20 21 /* Setup local solvers ksp_D and ksp_R */ 22 /* PCBDDCSetUpLocalScatters should be called first! */ 23 ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr); 24 25 /* Change global null space passed in by the user if change of basis has been requested */ 26 if (pcbddc->NullSpace && pcbddc->use_change_of_basis) { 27 ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr); 28 } 29 30 /* 31 Setup local correction and local part of coarse basis. 32 Gives back the dense local part of the coarse matrix in column major ordering 33 */ 34 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 35 36 /* Compute total number of coarse nodes and setup coarse solver */ 37 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 38 39 /* free */ 40 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "PCBDDCResetCustomization" 46 PetscErrorCode PCBDDCResetCustomization(PC pc) 47 { 48 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 49 PetscInt i; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 54 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 55 ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr); 56 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 57 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 58 for (i=0;i<pcbddc->n_ISForDofs;i++) { 59 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 60 } 61 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 62 pcbddc->n_ISForDofs = 0; 63 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 64 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "PCBDDCResetTopography" 70 PetscErrorCode PCBDDCResetTopography(PC pc) 71 { 72 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 73 PetscErrorCode ierr; 74 75 PetscFunctionBegin; 76 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 77 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 78 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "PCBDDCResetSolvers" 84 PetscErrorCode PCBDDCResetSolvers(PC pc) 85 { 86 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 87 PetscErrorCode ierr; 88 89 PetscFunctionBegin; 90 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 91 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 92 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 93 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 94 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 95 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 96 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 97 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 98 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 99 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 100 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 101 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 102 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 103 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 104 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 105 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 106 ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr); 107 ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr); 108 ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr); 109 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 110 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 111 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 112 PetscFunctionReturn(0); 113 } 114 115 #undef __FUNCT__ 116 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors" 117 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 118 { 119 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 120 PC_IS *pcis = (PC_IS*)pc->data; 121 VecType impVecType; 122 PetscInt n_vertices,n_constraints,local_primal_size,n_R,old_size; 123 PetscErrorCode ierr; 124 125 PetscFunctionBegin; 126 if (!pcbddc->ConstraintMatrix) { 127 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 128 } 129 /* get sizes */ 130 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,NULL);CHKERRQ(ierr); 131 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr); 132 n_constraints = local_primal_size - n_vertices; 133 n_R = pcis->n-n_vertices; 134 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 135 /* local work vectors (try to avoid unneeded work)*/ 136 /* R nodes */ 137 old_size = -1; 138 if (pcbddc->vec1_R) { 139 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 140 } 141 if (n_R != old_size) { 142 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 143 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 144 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 145 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 146 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 147 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 148 } 149 /* local primal dofs */ 150 old_size = -1; 151 if (pcbddc->vec1_P) { 152 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 153 } 154 if (local_primal_size != old_size) { 155 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 156 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 157 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr); 158 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 159 } 160 /* local explicit constraints */ 161 old_size = -1; 162 if (pcbddc->vec1_C) { 163 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 164 } 165 if (n_constraints && n_constraints != old_size) { 166 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 167 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 168 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 169 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "PCBDDCSetUpCorrection" 176 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 177 { 178 PetscErrorCode ierr; 179 /* pointers to pcis and pcbddc */ 180 PC_IS* pcis = (PC_IS*)pc->data; 181 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 182 /* submatrices of local problem */ 183 Mat A_RV,A_VR,A_VV; 184 /* working matrices */ 185 Mat M1,M2,M3,C_CR; 186 /* working vectors */ 187 Vec vec1_C,vec2_C,vec1_V,vec2_V; 188 /* additional working stuff */ 189 IS is_aux; 190 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 191 const PetscScalar *array,*row_cmat_values; 192 const PetscInt *row_cmat_indices,*idx_R_local; 193 PetscInt *vertices,*idx_V_B,*auxindices; 194 PetscInt n_vertices,n_constraints,size_of_constraint; 195 PetscInt i,j,n_R,n_D,n_B; 196 PetscBool setsym=PETSC_FALSE,issym=PETSC_FALSE; 197 /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */ 198 MatType impMatType; 199 /* some shortcuts to scalars */ 200 PetscScalar zero=0.0,one=1.0,m_one=-1.0; 201 /* for debugging purposes */ 202 PetscReal *coarsefunctions_errors,*constraints_errors; 203 204 PetscFunctionBegin; 205 /* get number of vertices and their local indices */ 206 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 207 n_constraints = pcbddc->local_primal_size-n_vertices; 208 /* Set Non-overlapping dimensions */ 209 n_B = pcis->n_B; n_D = pcis->n - n_B; 210 n_R = pcis->n-n_vertices; 211 212 /* Set types for local objects needed by BDDC precondtioner */ 213 impMatType = MATSEQDENSE; 214 215 /* Allocating some extra storage just to be safe */ 216 ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr); 217 for (i=0;i<pcis->n;i++) auxindices[i]=i; 218 219 /* vertices in boundary numbering */ 220 ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr); 221 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr); 222 if (i != n_vertices) { 223 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i); 224 } 225 226 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 227 if (n_constraints) { 228 /* see if we can save some allocations */ 229 if (pcbddc->local_auxmat2) { 230 PetscInt on_R,on_constraints; 231 ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr); 232 if (on_R != n_R || on_constraints != n_constraints) { 233 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 234 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 235 } 236 } 237 /* work vectors */ 238 ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr); 239 ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr); 240 /* auxiliary matrices */ 241 if (!pcbddc->local_auxmat2) { 242 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr); 243 ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 244 ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr); 245 ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr); 246 } 247 248 /* Extract constraints on R nodes: C_{CR} */ 249 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 250 ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 251 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 252 253 /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */ 254 for (i=0;i<n_constraints;i++) { 255 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 256 /* Get row of constraint matrix in R numbering */ 257 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 258 ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 259 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 260 ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr); 261 ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr); 262 /* Solve for row of constraint matrix in R numbering */ 263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 264 /* Set values in local_auxmat2 */ 265 ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 266 ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 267 ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr); 268 } 269 ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 270 ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 271 ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr); 272 273 /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc */ 274 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 275 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 276 ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr); 277 ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr); 278 ierr = MatSetType(M1,impMatType);CHKERRQ(ierr); 279 ierr = MatSetUp(M1);CHKERRQ(ierr); 280 ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 281 ierr = MatZeroEntries(M2);CHKERRQ(ierr); 282 ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr); 283 ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr); 284 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 285 ierr = MatDestroy(&M2);CHKERRQ(ierr); 286 ierr = MatDestroy(&M3);CHKERRQ(ierr); 287 /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */ 288 if (!pcbddc->local_auxmat1) { 289 ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 290 } else { 291 ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 292 } 293 } 294 295 /* Get submatrices from subdomain matrix */ 296 if (n_vertices) { 297 PetscInt ibs,mbs; 298 PetscBool issbaij; 299 Mat newmat; 300 301 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 302 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 303 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 304 if (ibs != mbs) { /* need to convert to SEQAIJ */ 305 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 306 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 307 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 308 ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 309 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 310 } else { 311 /* this is safe */ 312 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 313 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 314 if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 315 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 316 /* which of the two approaches is faster? */ 317 /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 318 ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/ 319 ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 320 ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr); 321 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 322 } else { 323 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 324 ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 325 } 326 } 327 ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr); 328 ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr); 329 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 330 } 331 332 /* Matrix of coarse basis functions (local) */ 333 if (pcbddc->coarse_phi_B) { 334 PetscInt on_B,on_primal; 335 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 336 if (on_B != n_B || on_primal != pcbddc->local_primal_size) { 337 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 338 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 339 } 340 } 341 if (pcbddc->coarse_phi_D) { 342 PetscInt on_D,on_primal; 343 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr); 344 if (on_D != n_D || on_primal != pcbddc->local_primal_size) { 345 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 346 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 347 } 348 } 349 if (!pcbddc->coarse_phi_B) { 350 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 351 ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 352 ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr); 353 ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr); 354 } 355 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) { 356 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 357 ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 358 ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr); 359 ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr); 360 } 361 362 if (pcbddc->dbg_flag) { 363 ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 364 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr); 365 ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr); 366 } 367 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 368 ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr); 369 370 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 371 372 /* vertices */ 373 for (i=0;i<n_vertices;i++) { 374 /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */ 375 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */ 376 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 377 ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr); 378 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 379 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 380 /* simplified solution of saddle point problem with null rhs on constraints multipliers */ 381 ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 382 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 383 ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr); 384 if (n_constraints) { 385 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr); 386 ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 387 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 388 } 389 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 390 ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr); 391 392 /* Set values in coarse basis function and subdomain part of coarse_mat */ 393 /* coarse basis functions */ 394 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 395 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 396 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 397 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 398 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 399 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 400 ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 401 if (pcbddc->switch_static || pcbddc->dbg_flag) { 402 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 403 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 404 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 405 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 406 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 407 } 408 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 409 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 410 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 411 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 412 if (n_constraints) { 413 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 414 ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 415 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 416 } 417 418 /* check */ 419 if (pcbddc->dbg_flag) { 420 /* assemble subdomain vector on local nodes */ 421 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 422 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 423 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 424 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 425 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 426 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 427 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 428 /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */ 429 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 430 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 431 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 433 if (n_constraints) { 434 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 435 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 436 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 437 } 438 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 439 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 440 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 441 /* check saddle point solution */ 442 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 443 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 444 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr); 445 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 446 /* shift by the identity matrix */ 447 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 448 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 449 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 450 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr); 451 } 452 } 453 454 /* constraints */ 455 for (i=0;i<n_constraints;i++) { 456 ierr = VecSet(vec2_C,zero);CHKERRQ(ierr); 457 ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 458 ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr); 459 ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr); 460 /* simplified solution of saddle point problem with null rhs on vertices multipliers */ 461 ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr); 462 ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 463 ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr); 464 if (n_vertices) { 465 ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr); 466 } 467 /* Set values in coarse basis function and subdomain part of coarse_mat */ 468 /* coarse basis functions */ 469 j = i+n_vertices; /* don't touch this! */ 470 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 471 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 472 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 473 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 474 ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 475 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 476 if (pcbddc->switch_static || pcbddc->dbg_flag) { 477 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 478 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 479 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 480 ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr); 481 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 482 } 483 /* subdomain contribution to coarse matrix. WARNING -> column major ordering */ 484 if (n_vertices) { 485 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 486 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 487 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 488 } 489 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 490 ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 491 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 492 493 if (pcbddc->dbg_flag) { 494 /* assemble subdomain vector on nodes */ 495 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 496 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 497 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 498 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 499 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 500 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 501 /* assemble subdomain vector of lagrange multipliers */ 502 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 503 if (n_vertices) { 504 ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr); 505 ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr); 506 ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr); 507 } 508 ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr); 509 ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr); 510 ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr); 511 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 512 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 513 ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr); 514 /* check saddle point solution */ 515 ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 516 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 517 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr); 518 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 519 /* shift by the identity matrix */ 520 ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr); 521 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 522 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 523 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr); 524 } 525 } 526 /* call assembling routines for local coarse basis */ 527 ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 529 if (pcbddc->switch_static || pcbddc->dbg_flag) { 530 ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 531 ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 532 } 533 534 /* compute other basis functions for non-symmetric problems */ 535 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 536 if (!setsym || (setsym && !issym)) { 537 if (!pcbddc->coarse_psi_B) { 538 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 539 ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr); 540 ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr); 541 ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr); 542 } 543 if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) { 544 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 545 ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr); 546 ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr); 547 ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr); 548 } 549 for (i=0;i<pcbddc->local_primal_size;i++) { 550 if (n_constraints) { 551 ierr = VecSet(vec1_C,zero);CHKERRQ(ierr); 552 for (j=0;j<n_constraints;j++) { 553 ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr); 556 ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr); 557 } 558 if (i<n_vertices) { 559 ierr = VecSet(vec1_V,zero);CHKERRQ(ierr); 560 ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr); 561 ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr); 562 ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr); 563 ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr); 564 if (n_constraints) { 565 ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 566 } 567 } else { 568 ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr); 569 } 570 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 571 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 572 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 573 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 574 ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 575 ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 576 ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr); 577 if (i<n_vertices) { 578 ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr); 579 } 580 if (pcbddc->switch_static || pcbddc->dbg_flag) { 581 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 582 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 583 ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 584 ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 585 ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr); 586 } 587 588 if (pcbddc->dbg_flag) { 589 /* assemble subdomain vector on nodes */ 590 ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr); 591 ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 592 ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr); 593 ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr); 594 if (i<n_vertices) { 595 ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr); 596 } 597 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 598 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 599 /* assemble subdomain vector of lagrange multipliers */ 600 for (j=0;j<pcbddc->local_primal_size;j++) { 601 ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr); 602 } 603 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 604 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 605 /* check saddle point solution */ 606 ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 607 ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr); 608 ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 609 ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr); 610 /* shift by the identity matrix */ 611 ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr); 612 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 613 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 614 ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr); 615 } 616 } 617 ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 618 ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 619 if (pcbddc->switch_static || pcbddc->dbg_flag) { 620 ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 621 ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 622 } 623 } 624 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 625 /* Checking coarse_sub_mat and coarse basis functios */ 626 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 627 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 628 if (pcbddc->dbg_flag) { 629 Mat coarse_sub_mat; 630 Mat AUXMAT,TM1,TM2,TM3,TM4; 631 Mat coarse_phi_D,coarse_phi_B; 632 Mat coarse_psi_D,coarse_psi_B; 633 Mat A_II,A_BB,A_IB,A_BI; 634 MatType checkmattype=MATSEQAIJ; 635 PetscReal real_value; 636 637 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 638 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 639 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 640 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 641 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 642 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 643 if (pcbddc->coarse_psi_B) { 644 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 645 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 646 } 647 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 648 649 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 650 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr); 651 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 652 if (pcbddc->coarse_psi_B) { 653 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 654 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 655 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 656 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 657 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 658 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 659 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 660 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 661 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 662 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 663 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 664 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 665 } else { 666 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 667 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 668 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 669 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 670 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 671 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 672 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 673 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 674 } 675 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 676 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 677 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 678 ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr); 679 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 680 ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr); 681 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 682 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr); 683 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr); 684 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr); 685 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr); 686 for (i=0;i<pcbddc->local_primal_size;i++) { 687 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr); 688 } 689 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr); 690 for (i=0;i<pcbddc->local_primal_size;i++) { 691 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr); 692 } 693 if (pcbddc->coarse_psi_B) { 694 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr); 695 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 696 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr); 697 } 698 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr); 699 for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) { 700 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr); 701 } 702 } 703 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 704 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 705 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 706 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 707 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 708 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 709 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 710 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 711 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 712 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 713 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 714 if (pcbddc->coarse_psi_B) { 715 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 716 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 717 } 718 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 719 ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr); 720 ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr); 721 ierr = PetscFree(constraints_errors);CHKERRQ(ierr); 722 } 723 /* free memory */ 724 if (n_vertices) { 725 ierr = PetscFree(vertices);CHKERRQ(ierr); 726 ierr = VecDestroy(&vec1_V);CHKERRQ(ierr); 727 ierr = VecDestroy(&vec2_V);CHKERRQ(ierr); 728 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 729 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 730 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 731 } 732 if (n_constraints) { 733 ierr = VecDestroy(&vec1_C);CHKERRQ(ierr); 734 ierr = VecDestroy(&vec2_C);CHKERRQ(ierr); 735 ierr = MatDestroy(&M1);CHKERRQ(ierr); 736 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 737 } 738 ierr = PetscFree(auxindices);CHKERRQ(ierr); 739 /* get back data */ 740 *coarse_submat_vals_n = coarse_submat_vals; 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "PCBDDCSetUpLocalMatrices" 746 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc) 747 { 748 PC_IS* pcis = (PC_IS*)(pc->data); 749 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 750 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 751 PetscBool issbaij,isbaij; 752 /* manage repeated solves */ 753 MatReuse reuse; 754 MatStructure matstruct; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) { 759 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created"); 760 } 761 /* get mat flags */ 762 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 763 reuse = MAT_INITIAL_MATRIX; 764 if (pc->setupcalled) { 765 /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */ 766 if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen"); 767 if (matstruct == SAME_NONZERO_PATTERN) { 768 reuse = MAT_REUSE_MATRIX; 769 } else { 770 reuse = MAT_INITIAL_MATRIX; 771 } 772 } 773 if (reuse == MAT_INITIAL_MATRIX) { 774 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 775 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 776 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 777 ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr); 778 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 779 } 780 781 /* transform local matrices if needed */ 782 if (pcbddc->use_change_of_basis) { 783 Mat change_mat_all; 784 PetscScalar *row_cmat_values; 785 PetscInt *row_cmat_indices; 786 PetscInt *nnz,*is_indices,*temp_indices; 787 PetscInt i,j,k,n_D,n_B; 788 789 /* Get Non-overlapping dimensions */ 790 n_B = pcis->n_B; 791 n_D = pcis->n-n_B; 792 793 /* compute nonzero structure of change of basis on all local nodes */ 794 ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 795 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 796 for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1; 797 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 798 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 799 k=1; 800 for (i=0;i<n_B;i++) { 801 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 802 nnz[is_indices[i]]=j; 803 if (k < j) k = j; 804 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr); 805 } 806 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 807 /* assemble change of basis matrix on the whole set of local dofs */ 808 ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 809 ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr); 810 ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 811 ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr); 812 ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr); 813 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 814 for (i=0;i<n_D;i++) { 815 ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 818 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 819 for (i=0;i<n_B;i++) { 820 ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 821 for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]]; 822 ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr); 823 ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr); 824 } 825 ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 826 ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 827 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */ 828 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 829 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr); 830 if (!issbaij && !isbaij) { 831 ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 832 } else { 833 Mat work_mat; 834 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 835 ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 836 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 837 } 838 /* 839 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 840 ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr); 841 */ 842 ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr); 843 ierr = PetscFree(nnz);CHKERRQ(ierr); 844 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 845 } else { 846 /* without change of basis, the local matrix is unchanged */ 847 if (!pcbddc->local_mat) { 848 ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr); 849 pcbddc->local_mat = matis->A; 850 } 851 } 852 853 /* get submatrices */ 854 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr); 855 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr); 856 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 857 if (!issbaij) { 858 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 859 ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 860 } else { 861 Mat newmat; 862 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 863 ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr); 864 ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr); 865 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "PCBDDCSetUpLocalScatters" 872 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 873 { 874 PC_IS* pcis = (PC_IS*)(pc->data); 875 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 876 IS is_aux1,is_aux2; 877 PetscInt *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local; 878 PetscInt n_vertices,i,j,n_R,n_D,n_B; 879 PetscInt vbs,bs; 880 PetscBT bitmask; 881 PetscErrorCode ierr; 882 883 PetscFunctionBegin; 884 /* No need to setup local scatters if primal space is unchanged */ 885 if (!pcbddc->new_primal_space_local) { 886 PetscFunctionReturn(0); 887 } 888 /* destroy old objects */ 889 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 890 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 892 /* Set Non-overlapping dimensions */ 893 n_B = pcis->n_B; n_D = pcis->n - n_B; 894 /* get vertex indices from constraint matrix */ 895 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr); 896 897 /* create auxiliary bitmask */ 898 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 899 for (i=0;i<n_vertices;i++) { 900 ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr); 901 } 902 903 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 904 ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr); 905 for (i=0, n_R=0; i<pcis->n; i++) { 906 if (!PetscBTLookup(bitmask,i)) { 907 idx_R_local[n_R] = i; 908 n_R++; 909 } 910 } 911 912 /* Block code */ 913 vbs = 1; 914 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 915 if (bs>1 && !(n_vertices%bs)) { 916 PetscBool is_blocked = PETSC_TRUE; 917 PetscInt *vary; 918 /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 919 ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr); 920 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 921 for (i=0; i<n_vertices; i++) vary[vertices[i]/bs]++; 922 for (i=0; i<n_vertices; i++) { 923 if (vary[i]!=0 && vary[i]!=bs) { 924 is_blocked = PETSC_FALSE; 925 break; 926 } 927 } 928 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 929 vbs = bs; 930 for (i=0;i<n_R/vbs;i++) { 931 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 932 } 933 } 934 ierr = PetscFree(vary);CHKERRQ(ierr); 935 } 936 ierr = PetscFree(vertices);CHKERRQ(ierr); 937 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 938 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 939 940 /* print some info if requested */ 941 if (pcbddc->dbg_flag) { 942 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr); 948 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr); 949 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 950 } 951 952 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 953 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 954 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 955 ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr); 956 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 957 for (i=0; i<n_D; i++) { 958 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 959 } 960 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 961 for (i=0, j=0; i<n_R; i++) { 962 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 963 aux_array1[j++] = i; 964 } 965 } 966 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 967 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 968 for (i=0, j=0; i<n_B; i++) { 969 if (!PetscBTLookup(bitmask,is_indices[i])) { 970 aux_array2[j++] = i; 971 } 972 } 973 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 974 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 975 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 976 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 977 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 978 979 if (pcbddc->switch_static || pcbddc->dbg_flag) { 980 ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr); 981 for (i=0, j=0; i<n_R; i++) { 982 if (PetscBTLookup(bitmask,idx_R_local[i])) { 983 aux_array1[j++] = i; 984 } 985 } 986 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 987 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 988 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 989 } 990 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 991 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 992 PetscFunctionReturn(0); 993 } 994 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "PCBDDCSetUpLocalSolvers" 998 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc) 999 { 1000 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1001 PC_IS *pcis = (PC_IS*)pc->data; 1002 PC pc_temp; 1003 Mat A_RR; 1004 MatStructure matstruct; 1005 MatReuse reuse; 1006 PetscScalar m_one = -1.0; 1007 PetscReal value; 1008 PetscInt n_D,n_R,ibs,mbs; 1009 PetscBool use_exact,use_exact_reduced,issbaij; 1010 PetscErrorCode ierr; 1011 /* prefixes stuff */ 1012 char dir_prefix[256],neu_prefix[256],str_level[3]; 1013 size_t len; 1014 1015 PetscFunctionBegin; 1016 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 1017 1018 /* compute prefixes */ 1019 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 1020 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 1021 if (!pcbddc->current_level) { 1022 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1023 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 1024 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1025 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1026 } else { 1027 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 1028 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 1029 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 1030 len -= 15; /* remove "pc_bddc_coarse_" */ 1031 if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */ 1032 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1033 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 1034 *(dir_prefix+len)='\0'; 1035 *(neu_prefix+len)='\0'; 1036 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 1037 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 1038 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 1039 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 1040 } 1041 1042 /* DIRICHLET PROBLEM */ 1043 /* Matrix for Dirichlet problem is pcis->A_II */ 1044 ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr); 1045 if (!pcbddc->ksp_D) { /* create object if not yet build */ 1046 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 1047 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 1048 /* default */ 1049 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 1050 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 1051 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1052 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1053 if (issbaij) { 1054 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1055 } else { 1056 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1057 } 1058 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1059 } 1060 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr); 1061 /* Allow user's customization */ 1062 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 1063 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1064 if (!n_D) { 1065 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 1066 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1067 } 1068 /* Set Up KSP for Dirichlet problem of BDDC */ 1069 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 1070 /* set ksp_D into pcis data */ 1071 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 1072 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 1073 pcis->ksp_D = pcbddc->ksp_D; 1074 1075 /* NEUMANN PROBLEM */ 1076 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 1077 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 1078 if (pcbddc->ksp_R) { /* already created ksp */ 1079 PetscInt nn_R; 1080 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr); 1081 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 1082 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 1083 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 1084 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 1085 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1086 reuse = MAT_INITIAL_MATRIX; 1087 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 1088 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 1089 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1090 reuse = MAT_INITIAL_MATRIX; 1091 } else { /* safe to reuse the matrix */ 1092 reuse = MAT_REUSE_MATRIX; 1093 } 1094 } 1095 /* last check */ 1096 if (matstruct == DIFFERENT_NONZERO_PATTERN) { 1097 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1098 reuse = MAT_INITIAL_MATRIX; 1099 } 1100 } else { /* first time, so we need to create the matrix */ 1101 reuse = MAT_INITIAL_MATRIX; 1102 } 1103 /* extract A_RR */ 1104 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 1105 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 1106 if (ibs != mbs) { 1107 Mat newmat; 1108 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr); 1109 ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1110 ierr = MatDestroy(&newmat);CHKERRQ(ierr); 1111 } else { 1112 ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 1113 } 1114 if (!pcbddc->ksp_R) { /* create object if not present */ 1115 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 1116 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 1117 /* default */ 1118 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 1119 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 1120 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1121 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 1122 if (issbaij) { 1123 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 1124 } else { 1125 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 1126 } 1127 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 1128 } 1129 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr); 1130 /* Allow user's customization */ 1131 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 1132 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 1133 if (!n_R) { 1134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 1135 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 1136 } 1137 /* Set Up KSP for Neumann problem of BDDC */ 1138 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 1139 1140 /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */ 1141 if (pcbddc->NullSpace || pcbddc->dbg_flag) { 1142 /* Dirichlet */ 1143 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 1144 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1145 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 1146 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 1147 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 1148 /* need to be adapted? */ 1149 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1150 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1151 ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr); 1152 /* print info */ 1153 if (pcbddc->dbg_flag) { 1154 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1155 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1156 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 1157 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr); 1158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 1159 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1160 } 1161 if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) { 1162 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr); 1163 } 1164 1165 /* Neumann */ 1166 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 1167 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1168 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1169 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 1170 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 1171 /* need to be adapted? */ 1172 use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE); 1173 ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 1174 /* print info */ 1175 if (pcbddc->dbg_flag) { 1176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 1177 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1178 } 1179 if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */ 1180 ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr); 1181 } 1182 } 1183 /* free Neumann problem's matrix */ 1184 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "PCBDDCSolveSaddlePoint" 1190 static PetscErrorCode PCBDDCSolveSaddlePoint(PC pc) 1191 { 1192 PetscErrorCode ierr; 1193 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1194 1195 PetscFunctionBegin; 1196 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 1197 if (pcbddc->local_auxmat1) { 1198 ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr); 1199 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner" 1206 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc) 1207 { 1208 PetscErrorCode ierr; 1209 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1210 PC_IS* pcis = (PC_IS*) (pc->data); 1211 const PetscScalar zero = 0.0; 1212 1213 PetscFunctionBegin; 1214 /* Application of PHI^T (or PSI^T) */ 1215 if (pcbddc->coarse_psi_B) { 1216 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1217 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1218 } else { 1219 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 1220 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 1221 } 1222 /* Scatter data of coarse_rhs */ 1223 if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); } 1224 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1225 1226 /* Local solution on R nodes */ 1227 ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); 1228 ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 ierr = VecScatterEnd (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 if (pcbddc->switch_static) { 1231 ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1232 ierr = VecScatterEnd (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1233 } 1234 ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr); 1235 ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr); 1236 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1237 ierr = VecScatterEnd (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1238 if (pcbddc->switch_static) { 1239 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1240 ierr = VecScatterEnd (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1241 } 1242 1243 /* Coarse solution */ 1244 ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1245 if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */ 1246 ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 1247 } 1248 ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1249 ierr = PCBDDCScatterCoarseDataEnd (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1250 1251 /* Sum contributions from two levels */ 1252 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 1253 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #undef __FUNCT__ 1258 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin" 1259 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1260 { 1261 PetscErrorCode ierr; 1262 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1263 1264 PetscFunctionBegin; 1265 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd" 1271 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode) 1272 { 1273 PetscErrorCode ierr; 1274 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 1275 1276 PetscFunctionBegin; 1277 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /* uncomment for testing purposes */ 1282 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "PCBDDCConstraintsSetUp" 1285 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 1286 { 1287 PetscErrorCode ierr; 1288 PC_IS* pcis = (PC_IS*)(pc->data); 1289 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 1290 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 1291 /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */ 1292 MatType impMatType=MATSEQAIJ; 1293 /* one and zero */ 1294 PetscScalar one=1.0,zero=0.0; 1295 /* space to store constraints and their local indices */ 1296 PetscScalar *temp_quadrature_constraint; 1297 PetscInt *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B; 1298 /* iterators */ 1299 PetscInt i,j,k,total_counts,temp_start_ptr; 1300 /* stuff to store connected components stored in pcbddc->mat_graph */ 1301 IS ISForVertices,*ISForFaces,*ISForEdges,*used_IS; 1302 PetscInt n_ISForFaces,n_ISForEdges; 1303 /* near null space stuff */ 1304 MatNullSpace nearnullsp; 1305 const Vec *nearnullvecs; 1306 Vec *localnearnullsp; 1307 PetscBool nnsp_has_cnst; 1308 PetscInt nnsp_size; 1309 PetscScalar *array; 1310 /* BLAS integers */ 1311 PetscBLASInt lwork,lierr; 1312 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 1313 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 1314 /* LAPACK working arrays for SVD or POD */ 1315 PetscBool skip_lapack; 1316 PetscScalar *work; 1317 PetscReal *singular_vals; 1318 #if defined(PETSC_USE_COMPLEX) 1319 PetscReal *rwork; 1320 #endif 1321 #if defined(PETSC_MISSING_LAPACK_GESVD) 1322 PetscBLASInt Blas_one_2=1; 1323 PetscScalar *temp_basis,*correlation_mat; 1324 #else 1325 PetscBLASInt dummy_int_1=1,dummy_int_2=1; 1326 PetscScalar dummy_scalar_1=0.0,dummy_scalar_2=0.0; 1327 #endif 1328 /* reuse */ 1329 PetscInt olocal_primal_size; 1330 PetscInt *oprimal_indices_local_idxs; 1331 /* change of basis */ 1332 PetscInt *aux_primal_numbering,*aux_primal_minloc,*global_indices; 1333 PetscBool boolforchange,qr_needed; 1334 PetscBT touched,change_basis,qr_needed_idx; 1335 /* auxiliary stuff */ 1336 PetscInt *nnz,*is_indices,*aux_primal_numbering_B; 1337 /* some quantities */ 1338 PetscInt n_vertices,total_primal_vertices,valid_constraints; 1339 PetscInt size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints; 1340 1341 1342 PetscFunctionBegin; 1343 /* Destroy Mat objects computed previously */ 1344 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1345 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1346 /* Get index sets for faces, edges and vertices from graph */ 1347 if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) { 1348 pcbddc->use_vertices = PETSC_TRUE; 1349 } 1350 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices); 1351 /* HACK: provide functions to set change of basis */ 1352 if (!ISForVertices && pcbddc->NullSpace) { 1353 pcbddc->use_change_of_basis = PETSC_TRUE; 1354 pcbddc->use_change_on_faces = PETSC_FALSE; 1355 } 1356 /* print some info */ 1357 if (pcbddc->dbg_flag) { 1358 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1360 i = 0; 1361 if (ISForVertices) { 1362 ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr); 1363 } 1364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr); 1365 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr); 1366 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr); 1367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 1368 } 1369 /* check if near null space is attached to global mat */ 1370 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 1371 if (nearnullsp) { 1372 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 1373 /* remove any stored info */ 1374 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 1375 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1376 /* store information for BDDC solver reuse */ 1377 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 1378 pcbddc->onearnullspace = nearnullsp; 1379 ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 1380 for (i=0;i<nnsp_size;i++) { 1381 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 1382 } 1383 } else { /* if near null space is not provided BDDC uses constants by default */ 1384 nnsp_size = 0; 1385 nnsp_has_cnst = PETSC_TRUE; 1386 } 1387 /* get max number of constraints on a single cc */ 1388 max_constraints = nnsp_size; 1389 if (nnsp_has_cnst) max_constraints++; 1390 1391 /* 1392 Evaluate maximum storage size needed by the procedure 1393 - temp_indices will contain start index of each constraint stored as follows 1394 - temp_indices_to_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts 1395 - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts 1396 - temp_quadrature_constraint [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself 1397 */ 1398 total_counts = n_ISForFaces+n_ISForEdges; 1399 total_counts *= max_constraints; 1400 n_vertices = 0; 1401 if (ISForVertices) { 1402 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 1403 } 1404 total_counts += n_vertices; 1405 ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr); 1406 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 1407 total_counts = 0; 1408 max_size_of_constraint = 0; 1409 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1410 if (i<n_ISForEdges) { 1411 used_IS = &ISForEdges[i]; 1412 } else { 1413 used_IS = &ISForFaces[i-n_ISForEdges]; 1414 } 1415 ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr); 1416 total_counts += j; 1417 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 1418 } 1419 total_counts *= max_constraints; 1420 total_counts += n_vertices; 1421 ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr); 1422 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr); 1423 ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr); 1424 /* get local part of global near null space vectors */ 1425 ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr); 1426 for (k=0;k<nnsp_size;k++) { 1427 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 1428 ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1429 ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1430 } 1431 1432 /* whether or not to skip lapack calls */ 1433 skip_lapack = PETSC_TRUE; 1434 if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE; 1435 1436 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 1437 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1438 PetscScalar temp_work; 1439 #if defined(PETSC_MISSING_LAPACK_GESVD) 1440 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 1441 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr); 1442 ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1443 ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr); 1444 #if defined(PETSC_USE_COMPLEX) 1445 ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1446 #endif 1447 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1448 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1449 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 1450 lwork = -1; 1451 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1452 #if !defined(PETSC_USE_COMPLEX) 1453 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 1454 #else 1455 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 1456 #endif 1457 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1458 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 1459 #else /* on missing GESVD */ 1460 /* SVD */ 1461 PetscInt max_n,min_n; 1462 max_n = max_size_of_constraint; 1463 min_n = max_constraints; 1464 if (max_size_of_constraint < max_constraints) { 1465 min_n = max_size_of_constraint; 1466 max_n = max_constraints; 1467 } 1468 ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr); 1469 #if defined(PETSC_USE_COMPLEX) 1470 ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr); 1471 #endif 1472 /* now we evaluate the optimal workspace using query with lwork=-1 */ 1473 lwork = -1; 1474 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 1475 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 1476 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 1477 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1478 #if !defined(PETSC_USE_COMPLEX) 1479 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr)); 1480 #else 1481 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr)); 1482 #endif 1483 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1484 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 1485 #endif /* on missing GESVD */ 1486 /* Allocate optimal workspace */ 1487 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 1488 ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr); 1489 } 1490 /* Now we can loop on constraining sets */ 1491 total_counts = 0; 1492 temp_indices[0] = 0; 1493 /* vertices */ 1494 if (ISForVertices) { 1495 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1496 if (nnsp_has_cnst) { /* consider all vertices */ 1497 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 1498 for (i=0;i<n_vertices;i++) { 1499 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1500 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1501 total_counts++; 1502 } 1503 } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */ 1504 PetscBool used_vertex; 1505 for (i=0;i<n_vertices;i++) { 1506 used_vertex = PETSC_FALSE; 1507 k = 0; 1508 while (!used_vertex && k<nnsp_size) { 1509 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1510 if (PetscAbsScalar(array[is_indices[i]])>0.0) { 1511 temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i]; 1512 temp_quadrature_constraint[temp_indices[total_counts]]=1.0; 1513 temp_indices[total_counts+1]=temp_indices[total_counts]+1; 1514 total_counts++; 1515 used_vertex = PETSC_TRUE; 1516 } 1517 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1518 k++; 1519 } 1520 } 1521 } 1522 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1523 n_vertices = total_counts; 1524 } 1525 1526 /* edges and faces */ 1527 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 1528 if (i<n_ISForEdges) { 1529 used_IS = &ISForEdges[i]; 1530 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 1531 } else { 1532 used_IS = &ISForFaces[i-n_ISForEdges]; 1533 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 1534 } 1535 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 1536 temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */ 1537 ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr); 1538 ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1539 /* change of basis should not be performed on local periodic nodes */ 1540 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 1541 if (nnsp_has_cnst) { 1542 PetscScalar quad_value; 1543 temp_constraints++; 1544 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 1545 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1546 for (j=0;j<size_of_constraint;j++) { 1547 temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value; 1548 } 1549 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1550 total_counts++; 1551 } 1552 for (k=0;k<nnsp_size;k++) { 1553 PetscReal real_value; 1554 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1555 ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 1556 for (j=0;j<size_of_constraint;j++) { 1557 temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]]; 1558 } 1559 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 1560 /* check if array is null on the connected component */ 1561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1562 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one)); 1563 if (real_value > 0.0) { /* keep indices and values */ 1564 temp_constraints++; 1565 temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint; /* store new starting point */ 1566 total_counts++; 1567 } 1568 } 1569 ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr); 1570 valid_constraints = temp_constraints; 1571 /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */ 1572 if (!pcbddc->use_nnsp_true && temp_constraints) { 1573 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 1574 1575 #if defined(PETSC_MISSING_LAPACK_GESVD) 1576 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 1577 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 1578 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 1579 the constraints basis will differ (by a complex factor with absolute value equal to 1) 1580 from that computed using LAPACKgesvd 1581 -> This is due to a different computation of eigenvectors in LAPACKheev 1582 -> The quality of the POD-computed basis will be the same */ 1583 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 1584 /* Store upper triangular part of correlation matrix */ 1585 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1586 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1587 for (j=0;j<temp_constraints;j++) { 1588 for (k=0;k<j+1;k++) { 1589 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2)); 1590 } 1591 } 1592 /* compute eigenvalues and eigenvectors of correlation matrix */ 1593 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1594 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 1595 #if !defined(PETSC_USE_COMPLEX) 1596 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 1597 #else 1598 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 1599 #endif 1600 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1601 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 1602 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 1603 j=0; 1604 while (j < temp_constraints && singular_vals[j] < tol) j++; 1605 total_counts=total_counts-j; 1606 valid_constraints = temp_constraints-j; 1607 /* scale and copy POD basis into used quadrature memory */ 1608 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1609 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1610 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 1611 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1612 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 1613 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1614 if (j<temp_constraints) { 1615 PetscInt ii; 1616 for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]); 1617 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1618 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 1619 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1620 for (k=0;k<temp_constraints-j;k++) { 1621 for (ii=0;ii<size_of_constraint;ii++) { 1622 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 1623 } 1624 } 1625 } 1626 #else /* on missing GESVD */ 1627 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1628 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 1629 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1630 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1631 #if !defined(PETSC_USE_COMPLEX) 1632 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr)); 1633 #else 1634 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr)); 1635 #endif 1636 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 1637 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1638 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 1639 k = temp_constraints; 1640 if (k > size_of_constraint) k = size_of_constraint; 1641 j = 0; 1642 while (j < k && singular_vals[k-j-1] < tol) j++; 1643 total_counts = total_counts-temp_constraints+k-j; 1644 valid_constraints = k-j; 1645 #endif /* on missing GESVD */ 1646 } 1647 /* setting change_of_basis flag is safe now */ 1648 if (boolforchange) { 1649 for (j=0;j<valid_constraints;j++) { 1650 PetscBTSet(change_basis,total_counts-j-1); 1651 } 1652 } 1653 } 1654 /* free index sets of faces, edges and vertices */ 1655 for (i=0;i<n_ISForFaces;i++) { 1656 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 1657 } 1658 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 1659 for (i=0;i<n_ISForEdges;i++) { 1660 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 1661 } 1662 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 1663 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 1664 /* map temp_indices_to_constraint in boundary numbering */ 1665 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr); 1666 if (i != temp_indices[total_counts]) { 1667 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i); 1668 } 1669 1670 /* free workspace */ 1671 if (!pcbddc->use_nnsp_true && !skip_lapack) { 1672 ierr = PetscFree(work);CHKERRQ(ierr); 1673 #if defined(PETSC_USE_COMPLEX) 1674 ierr = PetscFree(rwork);CHKERRQ(ierr); 1675 #endif 1676 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 1677 #if defined(PETSC_MISSING_LAPACK_GESVD) 1678 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 1679 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 1680 #endif 1681 } 1682 for (k=0;k<nnsp_size;k++) { 1683 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 1684 } 1685 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 1686 1687 /* set quantities in pcbddc data structure and store previous primal size */ 1688 /* n_vertices defines the number of subdomain corners in the primal space */ 1689 /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */ 1690 olocal_primal_size = pcbddc->local_primal_size; 1691 pcbddc->local_primal_size = total_counts; 1692 pcbddc->n_vertices = n_vertices; 1693 pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices; 1694 1695 /* Create constraint matrix */ 1696 /* The constraint matrix is used to compute the l2g map of primal dofs */ 1697 /* so we need to set it up properly either with or without change of basis */ 1698 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 1699 ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr); 1700 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 1701 /* array to compute a local numbering of constraints : vertices first then constraints */ 1702 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr); 1703 /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */ 1704 /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */ 1705 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr); 1706 /* auxiliary stuff for basis change */ 1707 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr); 1708 ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr); 1709 1710 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 1711 total_primal_vertices=0; 1712 for (i=0;i<pcbddc->local_primal_size;i++) { 1713 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1714 if (size_of_constraint == 1) { 1715 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr); 1716 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]]; 1717 aux_primal_minloc[total_primal_vertices]=0; 1718 total_primal_vertices++; 1719 } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */ 1720 PetscInt min_loc,min_index; 1721 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr); 1722 /* find first untouched local node */ 1723 k = 0; 1724 while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++; 1725 min_index = global_indices[k]; 1726 min_loc = k; 1727 /* search the minimum among global nodes already untouched on the cc */ 1728 for (k=1;k<size_of_constraint;k++) { 1729 /* there can be more than one constraint on a single connected component */ 1730 if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) { 1731 min_index = global_indices[k]; 1732 min_loc = k; 1733 } 1734 } 1735 ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr); 1736 aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc]; 1737 aux_primal_minloc[total_primal_vertices]=min_loc; 1738 total_primal_vertices++; 1739 } 1740 } 1741 /* determine if a QR strategy is needed for change of basis */ 1742 qr_needed = PETSC_FALSE; 1743 ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr); 1744 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1745 if (PetscBTLookup(change_basis,i)) { 1746 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1747 j = 0; 1748 for (k=0;k<size_of_constraint;k++) { 1749 if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) { 1750 j++; 1751 } 1752 } 1753 /* found more than one primal dof on the cc */ 1754 if (j > 1) { 1755 PetscBTSet(qr_needed_idx,i); 1756 qr_needed = PETSC_TRUE; 1757 } 1758 } 1759 } 1760 /* free workspace */ 1761 ierr = PetscFree(global_indices);CHKERRQ(ierr); 1762 1763 /* permute indices in order to have a sorted set of vertices */ 1764 ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr); 1765 1766 /* nonzero structure of constraint matrix */ 1767 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1768 for (i=0;i<total_primal_vertices;i++) nnz[i]=1; 1769 j=total_primal_vertices; 1770 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1771 if (!PetscBTLookup(change_basis,i)) { 1772 nnz[j]=temp_indices[i+1]-temp_indices[i]; 1773 j++; 1774 } 1775 } 1776 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 1777 ierr = PetscFree(nnz);CHKERRQ(ierr); 1778 /* set values in constraint matrix */ 1779 for (i=0;i<total_primal_vertices;i++) { 1780 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 1781 } 1782 total_counts = total_primal_vertices; 1783 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1784 if (!PetscBTLookup(change_basis,i)) { 1785 size_of_constraint=temp_indices[i+1]-temp_indices[i]; 1786 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr); 1787 total_counts++; 1788 } 1789 } 1790 /* assembling */ 1791 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1792 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1793 /* 1794 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 1795 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 1796 */ 1797 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 1798 if (pcbddc->use_change_of_basis) { 1799 /* dual and primal dofs on a single cc */ 1800 PetscInt dual_dofs,primal_dofs; 1801 /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */ 1802 PetscInt primal_counter; 1803 /* working stuff for GEQRF */ 1804 PetscScalar *qr_basis,*qr_tau,*qr_work,lqr_work_t; 1805 PetscBLASInt lqr_work; 1806 /* working stuff for UNGQR */ 1807 PetscScalar *gqr_work,lgqr_work_t; 1808 PetscBLASInt lgqr_work; 1809 /* working stuff for TRTRS */ 1810 PetscScalar *trs_rhs; 1811 PetscBLASInt Blas_NRHS; 1812 /* pointers for values insertion into change of basis matrix */ 1813 PetscInt *start_rows,*start_cols; 1814 PetscScalar *start_vals; 1815 /* working stuff for values insertion */ 1816 PetscBT is_primal; 1817 1818 /* change of basis acts on local interfaces -> dimension is n_B x n_B */ 1819 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 1820 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr); 1821 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr); 1822 /* work arrays */ 1823 ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr); 1824 for (i=0;i<pcis->n_B;i++) nnz[i]=1; 1825 /* nonzeros per row */ 1826 for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) { 1827 if (PetscBTLookup(change_basis,i)) { 1828 size_of_constraint = temp_indices[i+1]-temp_indices[i]; 1829 if (PetscBTLookup(qr_needed_idx,i)) { 1830 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1831 } else { 1832 for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2; 1833 /* get local primal index on the cc */ 1834 j = 0; 1835 while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++; 1836 nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint; 1837 } 1838 } 1839 } 1840 ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 1841 ierr = PetscFree(nnz);CHKERRQ(ierr); 1842 /* Set initial identity in the matrix */ 1843 for (i=0;i<pcis->n_B;i++) { 1844 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 1845 } 1846 1847 if (pcbddc->dbg_flag) { 1848 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 1849 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 1850 } 1851 1852 1853 /* Now we loop on the constraints which need a change of basis */ 1854 /* 1855 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 1856 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 1857 1858 Basic blocks of change of basis matrix T computed 1859 1860 - Using the following block transformation if there is only a primal dof on the cc 1861 (in the example, primal dof is the last one of the edge in LOCAL ordering 1862 in this code, primal dof is the first one of the edge in GLOBAL ordering) 1863 | 1 0 ... 0 1 | 1864 | 0 1 ... 0 1 | 1865 | ... | 1866 | 0 ... 1 1 | 1867 | -s_1/s_n ... -s_{n-1}/-s_n 1 | 1868 1869 - via QR decomposition of constraints otherwise 1870 */ 1871 if (qr_needed) { 1872 /* space to store Q */ 1873 ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr); 1874 /* first we issue queries for optimal work */ 1875 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1876 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 1877 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1878 lqr_work = -1; 1879 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 1880 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 1881 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 1882 ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr); 1883 lgqr_work = -1; 1884 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 1885 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 1886 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 1887 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1888 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 1889 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 1890 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 1891 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 1892 ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr); 1893 /* array to store scaling factors for reflectors */ 1894 ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr); 1895 /* array to store rhs and solution of triangular solver */ 1896 ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr); 1897 /* allocating workspace for check */ 1898 if (pcbddc->dbg_flag) { 1899 ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr); 1900 } 1901 } 1902 /* array to store whether a node is primal or not */ 1903 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 1904 ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr); 1905 ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr); 1906 if (i != total_primal_vertices) { 1907 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i); 1908 } 1909 for (i=0;i<total_primal_vertices;i++) { 1910 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 1911 } 1912 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 1913 1914 /* loop on constraints and see whether or not they need a change of basis and compute it */ 1915 /* -> using implicit ordering contained in temp_indices data */ 1916 total_counts = pcbddc->n_vertices; 1917 primal_counter = total_counts; 1918 while (total_counts<pcbddc->local_primal_size) { 1919 primal_dofs = 1; 1920 if (PetscBTLookup(change_basis,total_counts)) { 1921 /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */ 1922 while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) { 1923 primal_dofs++; 1924 } 1925 /* get constraint info */ 1926 size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts]; 1927 dual_dofs = size_of_constraint-primal_dofs; 1928 1929 if (pcbddc->dbg_flag) { 1930 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr); 1931 } 1932 1933 if (primal_dofs > 1) { /* QR */ 1934 1935 /* copy quadrature constraints for change of basis check */ 1936 if (pcbddc->dbg_flag) { 1937 ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1938 } 1939 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 1940 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1941 1942 /* compute QR decomposition of constraints */ 1943 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1944 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1946 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1947 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 1948 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 1949 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1950 1951 /* explictly compute R^-T */ 1952 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 1953 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 1954 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1955 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 1956 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1957 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1958 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1959 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 1960 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 1961 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1962 1963 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 1964 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1965 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 1966 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1969 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 1970 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 1971 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1972 1973 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 1974 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 1975 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 1976 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 1977 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 1978 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 1979 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 1980 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 1981 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 1982 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 1983 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC)); 1984 ierr = PetscFPTrapPop();CHKERRQ(ierr); 1985 ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 1986 1987 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 1988 start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]]; 1989 /* insert cols for primal dofs */ 1990 for (j=0;j<primal_dofs;j++) { 1991 start_vals = &qr_basis[j*size_of_constraint]; 1992 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]]; 1993 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 1994 } 1995 /* insert cols for dual dofs */ 1996 for (j=0,k=0;j<dual_dofs;k++) { 1997 if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) { 1998 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 1999 start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2000 ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 2001 j++; 2002 } 2003 } 2004 2005 /* check change of basis */ 2006 if (pcbddc->dbg_flag) { 2007 PetscInt ii,jj; 2008 PetscBool valid_qr=PETSC_TRUE; 2009 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 2010 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 2011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 2012 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 2013 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 2014 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 2015 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 2016 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC)); 2017 ierr = PetscFPTrapPop();CHKERRQ(ierr); 2018 for (jj=0;jj<size_of_constraint;jj++) { 2019 for (ii=0;ii<primal_dofs;ii++) { 2020 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 2021 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 2022 } 2023 } 2024 if (!valid_qr) { 2025 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2026 for (jj=0;jj<size_of_constraint;jj++) { 2027 for (ii=0;ii<primal_dofs;ii++) { 2028 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 2029 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2030 } 2031 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 2032 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 2033 } 2034 } 2035 } 2036 } else { 2037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr); 2038 } 2039 } 2040 } else { /* simple transformation block */ 2041 PetscInt row,col; 2042 PetscScalar val; 2043 for (j=0;j<size_of_constraint;j++) { 2044 row = temp_indices_to_constraint_B[temp_indices[total_counts]+j]; 2045 if (!PetscBTLookup(is_primal,row)) { 2046 col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2047 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 2048 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr); 2049 } else { 2050 for (k=0;k<size_of_constraint;k++) { 2051 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k]; 2052 if (row != col) { 2053 val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]; 2054 } else { 2055 val = 1.0; 2056 } 2057 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 2058 } 2059 } 2060 } 2061 } 2062 /* increment primal counter */ 2063 primal_counter += primal_dofs; 2064 } else { 2065 if (pcbddc->dbg_flag) { 2066 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr); 2067 } 2068 } 2069 /* increment constraint counter total_counts */ 2070 total_counts += primal_dofs; 2071 } 2072 2073 /* free workspace */ 2074 if (qr_needed) { 2075 if (pcbddc->dbg_flag) { 2076 ierr = PetscFree(work);CHKERRQ(ierr); 2077 } 2078 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 2079 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 2080 ierr = PetscFree(qr_work);CHKERRQ(ierr); 2081 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 2082 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 2083 } 2084 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 2085 /* assembling */ 2086 ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 /* 2089 ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 2090 ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr); 2091 */ 2092 } 2093 2094 /* get indices in local ordering for vertices and constraints */ 2095 if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */ 2096 ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr); 2097 ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr); 2098 } 2099 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2100 ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 2101 ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr); 2102 ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr); 2103 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2104 ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr); 2105 ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr); 2106 ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr); 2107 /* check if a new primal space has been introduced */ 2108 pcbddc->new_primal_space_local = PETSC_TRUE; 2109 if (olocal_primal_size == pcbddc->local_primal_size) { 2110 ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr); 2111 pcbddc->new_primal_space_local = !pcbddc->new_primal_space_local; 2112 ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr); 2113 } 2114 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 2115 ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2116 2117 /* flush dbg viewer */ 2118 if (pcbddc->dbg_flag) { 2119 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2120 } 2121 2122 /* free workspace */ 2123 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2124 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 2125 ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr); 2126 ierr = PetscFree(temp_indices);CHKERRQ(ierr); 2127 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 2128 ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr); 2129 ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr); 2130 ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "PCBDDCAnalyzeInterface" 2136 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 2137 { 2138 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2139 PC_IS *pcis = (PC_IS*)pc->data; 2140 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2141 PetscInt bs,ierr,i,vertex_size; 2142 PetscViewer viewer=pcbddc->dbg_viewer; 2143 2144 PetscFunctionBegin; 2145 /* Reset previously computed graph */ 2146 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 2147 /* Init local Graph struct */ 2148 ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr); 2149 2150 /* Check validity of the csr graph passed in by the user */ 2151 if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) { 2152 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 2153 } 2154 2155 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 2156 if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) { 2157 Mat mat_adj; 2158 const PetscInt *xadj,*adjncy; 2159 PetscBool flg_row=PETSC_TRUE; 2160 2161 ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr); 2162 ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2163 if (!flg_row) { 2164 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__); 2165 } 2166 ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 2167 ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 2168 if (!flg_row) { 2169 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__); 2170 } 2171 ierr = MatDestroy(&mat_adj);CHKERRQ(ierr); 2172 } 2173 2174 /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */ 2175 vertex_size = 1; 2176 if (!pcbddc->user_provided_isfordofs) { 2177 if (!pcbddc->n_ISForDofs) { 2178 IS *custom_ISForDofs; 2179 2180 ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr); 2181 ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr); 2182 for (i=0;i<bs;i++) { 2183 ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr); 2184 } 2185 ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr); 2186 pcbddc->user_provided_isfordofs = PETSC_FALSE; 2187 /* remove my references to IS objects */ 2188 for (i=0;i<bs;i++) { 2189 ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr); 2190 } 2191 ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr); 2192 } 2193 } else { /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */ 2194 ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr); 2195 } 2196 2197 /* Setup of Graph */ 2198 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices); 2199 2200 /* Graph's connected components analysis */ 2201 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 2202 2203 /* print some info to stdout */ 2204 if (pcbddc->dbg_flag) { 2205 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer); 2206 } 2207 2208 /* mark topography has done */ 2209 pcbddc->recompute_topography = PETSC_FALSE; 2210 PetscFunctionReturn(0); 2211 } 2212 2213 #undef __FUNCT__ 2214 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx" 2215 PetscErrorCode PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx) 2216 { 2217 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2218 PetscInt *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 n = 0; 2223 vertices = 0; 2224 if (pcbddc->ConstraintMatrix) { 2225 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr); 2226 for (i=0;i<local_primal_size;i++) { 2227 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2228 if (size_of_constraint == 1) n++; 2229 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2230 } 2231 if (vertices_idx) { 2232 ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr); 2233 n = 0; 2234 for (i=0;i<local_primal_size;i++) { 2235 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2236 if (size_of_constraint == 1) { 2237 vertices[n++]=row_cmat_indices[0]; 2238 } 2239 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2240 } 2241 } 2242 } 2243 *n_vertices = n; 2244 if (vertices_idx) *vertices_idx = vertices; 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx" 2250 PetscErrorCode PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx) 2251 { 2252 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 2253 PetscInt *constraints_index,*row_cmat_indices,*row_cmat_global_indices; 2254 PetscInt n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc; 2255 PetscBT touched; 2256 PetscErrorCode ierr; 2257 2258 /* This function assumes that the number of local constraints per connected component 2259 is not greater than the number of nodes defined for the connected component 2260 (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */ 2261 PetscFunctionBegin; 2262 n = 0; 2263 constraints_index = 0; 2264 if (pcbddc->ConstraintMatrix) { 2265 ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr); 2266 max_size_of_constraint = 0; 2267 for (i=0;i<local_primal_size;i++) { 2268 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2269 if (size_of_constraint > 1) { 2270 n++; 2271 } 2272 max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint); 2273 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr); 2274 } 2275 if (constraints_idx) { 2276 ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr); 2277 ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr); 2278 ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr); 2279 n = 0; 2280 for (i=0;i<local_primal_size;i++) { 2281 ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2282 if (size_of_constraint > 1) { 2283 ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr); 2284 /* find first untouched local node */ 2285 j = 0; 2286 while (PetscBTLookup(touched,row_cmat_indices[j])) j++; 2287 min_index = row_cmat_global_indices[j]; 2288 min_loc = j; 2289 /* search the minimum among nodes not yet touched on the connected component 2290 since there can be more than one constraint on a single cc */ 2291 for (j=1;j<size_of_constraint;j++) { 2292 if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) { 2293 min_index = row_cmat_global_indices[j]; 2294 min_loc = j; 2295 } 2296 } 2297 ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr); 2298 constraints_index[n++] = row_cmat_indices[min_loc]; 2299 } 2300 ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr); 2301 } 2302 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 2303 ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr); 2304 } 2305 } 2306 *n_constraints = n; 2307 if (constraints_idx) *constraints_idx = constraints_index; 2308 PetscFunctionReturn(0); 2309 } 2310 2311 /* the next two functions has been adapted from pcis.c */ 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "PCBDDCApplySchur" 2314 PetscErrorCode PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2315 { 2316 PetscErrorCode ierr; 2317 PC_IS *pcis = (PC_IS*)(pc->data); 2318 2319 PetscFunctionBegin; 2320 if (!vec2_B) { vec2_B = v; } 2321 ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2322 ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr); 2323 ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2324 ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr); 2325 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 #undef __FUNCT__ 2330 #define __FUNCT__ "PCBDDCApplySchurTranspose" 2331 PetscErrorCode PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D) 2332 { 2333 PetscErrorCode ierr; 2334 PC_IS *pcis = (PC_IS*)(pc->data); 2335 2336 PetscFunctionBegin; 2337 if (!vec2_B) { vec2_B = v; } 2338 ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr); 2339 ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr); 2340 ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr); 2341 ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr); 2342 ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "PCBDDCSubsetNumbering" 2348 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[]) 2349 { 2350 Vec local_vec,global_vec; 2351 IS seqis,paris; 2352 VecScatter scatter_ctx; 2353 PetscScalar *array; 2354 PetscInt *temp_global_dofs; 2355 PetscScalar globalsum; 2356 PetscInt i,j,s; 2357 PetscInt nlocals,first_index,old_index,max_local; 2358 PetscMPIInt rank_prec_comm,size_prec_comm,max_global; 2359 PetscMPIInt *dof_sizes,*dof_displs; 2360 PetscBool first_found; 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 /* mpi buffers */ 2365 MPI_Comm_size(comm,&size_prec_comm); 2366 MPI_Comm_rank(comm,&rank_prec_comm); 2367 j = ( !rank_prec_comm ? size_prec_comm : 0); 2368 ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr); 2369 ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr); 2370 /* get maximum size of subset */ 2371 ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr); 2372 ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr); 2373 max_local = 0; 2374 if (n_local_dofs) { 2375 max_local = temp_global_dofs[0]; 2376 for (i=1;i<n_local_dofs;i++) { 2377 if (max_local < temp_global_dofs[i] ) { 2378 max_local = temp_global_dofs[i]; 2379 } 2380 } 2381 } 2382 ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm); 2383 max_global++; 2384 max_local = 0; 2385 if (n_local_dofs) { 2386 max_local = local_dofs[0]; 2387 for (i=1;i<n_local_dofs;i++) { 2388 if (max_local < local_dofs[i] ) { 2389 max_local = local_dofs[i]; 2390 } 2391 } 2392 } 2393 max_local++; 2394 /* allocate workspace */ 2395 ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr); 2396 ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr); 2397 ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr); 2398 ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr); 2399 ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr); 2400 ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr); 2401 /* create scatter */ 2402 ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr); 2403 ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr); 2404 ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr); 2405 ierr = ISDestroy(&seqis);CHKERRQ(ierr); 2406 ierr = ISDestroy(&paris);CHKERRQ(ierr); 2407 /* init array */ 2408 ierr = VecSet(global_vec,0.0);CHKERRQ(ierr); 2409 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2410 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2411 if (local_dofs_mult) { 2412 for (i=0;i<n_local_dofs;i++) { 2413 array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i]; 2414 } 2415 } else { 2416 for (i=0;i<n_local_dofs;i++) { 2417 array[local_dofs[i]]=1.0; 2418 } 2419 } 2420 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2421 /* scatter into global vec and get total number of global dofs */ 2422 ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2423 ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2424 ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr); 2425 *n_global_subset = (PetscInt)PetscRealPart(globalsum); 2426 /* Fill global_vec with cumulative function for global numbering */ 2427 ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr); 2428 ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr); 2429 nlocals = 0; 2430 first_index = -1; 2431 first_found = PETSC_FALSE; 2432 for (i=0;i<s;i++) { 2433 if (!first_found && PetscRealPart(array[i]) > 0.0) { 2434 first_found = PETSC_TRUE; 2435 first_index = i; 2436 } 2437 nlocals += (PetscInt)PetscRealPart(array[i]); 2438 } 2439 ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2440 if (!rank_prec_comm) { 2441 dof_displs[0]=0; 2442 for (i=1;i<size_prec_comm;i++) { 2443 dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1]; 2444 } 2445 } 2446 ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr); 2447 if (first_found) { 2448 array[first_index] += (PetscScalar)nlocals; 2449 old_index = first_index; 2450 for (i=first_index+1;i<s;i++) { 2451 if (PetscRealPart(array[i]) > 0.0) { 2452 array[i] += array[old_index]; 2453 old_index = i; 2454 } 2455 } 2456 } 2457 ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr); 2458 ierr = VecSet(local_vec,0.0);CHKERRQ(ierr); 2459 ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2460 ierr = VecScatterEnd (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2461 /* get global ordering of local dofs */ 2462 ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr); 2463 if (local_dofs_mult) { 2464 for (i=0;i<n_local_dofs;i++) { 2465 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i]; 2466 } 2467 } else { 2468 for (i=0;i<n_local_dofs;i++) { 2469 temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1; 2470 } 2471 } 2472 ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr); 2473 /* free workspace */ 2474 ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr); 2475 ierr = VecDestroy(&local_vec);CHKERRQ(ierr); 2476 ierr = VecDestroy(&global_vec);CHKERRQ(ierr); 2477 ierr = PetscFree(dof_sizes);CHKERRQ(ierr); 2478 ierr = PetscFree(dof_displs);CHKERRQ(ierr); 2479 /* return pointer to global ordering of local dofs */ 2480 *global_numbering_subset = temp_global_dofs; 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "PCBDDCOrthonormalizeVecs" 2486 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 2487 { 2488 PetscInt i,j; 2489 PetscScalar *alphas; 2490 PetscErrorCode ierr; 2491 2492 PetscFunctionBegin; 2493 /* this implements stabilized Gram-Schmidt */ 2494 ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr); 2495 for (i=0;i<n;i++) { 2496 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 2497 if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); } 2498 for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); } 2499 } 2500 ierr = PetscFree(alphas);CHKERRQ(ierr); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 /* TODO 2505 - now preallocation is done assuming SEQDENSE local matrices 2506 */ 2507 #undef __FUNCT__ 2508 #define __FUNCT__ "MatISGetMPIXAIJ" 2509 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M) 2510 { 2511 Mat new_mat; 2512 Mat_IS *matis = (Mat_IS*)(mat->data); 2513 /* info on mat */ 2514 /* ISLocalToGlobalMapping rmapping,cmapping; */ 2515 PetscInt bs,rows,cols; 2516 PetscInt lrows,lcols; 2517 PetscInt local_rows,local_cols; 2518 PetscBool isdense; 2519 /* values insertion */ 2520 PetscScalar *array; 2521 PetscInt *local_indices,*global_indices; 2522 /* work */ 2523 PetscInt i,j,index_row; 2524 PetscErrorCode ierr; 2525 2526 PetscFunctionBegin; 2527 /* MISSING CHECKS 2528 - rectangular case not covered (it is not allowed by MATIS) 2529 */ 2530 /* get info from mat */ 2531 /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */ 2532 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2533 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 2534 ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr); 2535 2536 /* work */ 2537 ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr); 2538 for (i=0;i<local_rows;i++) local_indices[i]=i; 2539 /* map indices of local mat to global values */ 2540 ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr); 2541 /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */ 2542 ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); 2543 2544 if (reuse==MAT_INITIAL_MATRIX) { 2545 Vec vec_dnz,vec_onz; 2546 PetscScalar *my_dnz,*my_onz; 2547 PetscInt *dnz,*onz,*mat_ranges,*row_ownership; 2548 PetscInt index_col,owner; 2549 PetscMPIInt nsubdomains; 2550 2551 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr); 2552 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr); 2553 ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr); 2554 ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr); 2555 ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr); 2556 ierr = MatSetUp(new_mat);CHKERRQ(ierr); 2557 ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr); 2558 2559 /* 2560 preallocation 2561 */ 2562 2563 ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr); 2564 /* 2565 Some vectors are needed to sum up properly on shared interface dofs. 2566 Preallocation macros cannot do the job. 2567 Note that preallocation is not exact, since it overestimates nonzeros 2568 */ 2569 ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr); 2570 /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */ 2571 ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr); 2572 ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr); 2573 /* All processes need to compute entire row ownership */ 2574 ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr); 2575 ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr); 2576 for (i=0;i<nsubdomains;i++) { 2577 for (j=mat_ranges[i];j<mat_ranges[i+1];j++) { 2578 row_ownership[j]=i; 2579 } 2580 } 2581 2582 /* 2583 my_dnz and my_onz contains exact contribution to preallocation from each local mat 2584 then, they will be summed up properly. This way, preallocation is always sufficient 2585 */ 2586 ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr); 2587 ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr); 2588 ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr); 2589 ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr); 2590 for (i=0;i<local_rows;i++) { 2591 index_row = global_indices[i]; 2592 for (j=i;j<local_rows;j++) { 2593 owner = row_ownership[index_row]; 2594 index_col = global_indices[j]; 2595 if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */ 2596 my_dnz[i] += 1.0; 2597 } else { /* offdiag block */ 2598 my_onz[i] += 1.0; 2599 } 2600 /* same as before, interchanging rows and cols */ 2601 if (i != j) { 2602 owner = row_ownership[index_col]; 2603 if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) { 2604 my_dnz[j] += 1.0; 2605 } else { 2606 my_onz[j] += 1.0; 2607 } 2608 } 2609 } 2610 } 2611 ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr); 2612 ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr); 2613 if (local_rows) { /* multilevel guard */ 2614 ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr); 2615 ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr); 2616 } 2617 ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr); 2618 ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr); 2619 ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr); 2620 ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr); 2621 ierr = PetscFree(my_dnz);CHKERRQ(ierr); 2622 ierr = PetscFree(my_onz);CHKERRQ(ierr); 2623 ierr = PetscFree(row_ownership);CHKERRQ(ierr); 2624 2625 /* set computed preallocation in dnz and onz */ 2626 ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr); 2627 for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]); 2628 ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr); 2629 ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr); 2630 for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]); 2631 ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr); 2632 ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr); 2633 ierr = VecDestroy(&vec_onz);CHKERRQ(ierr); 2634 2635 /* Resize preallocation if overestimated */ 2636 for (i=0;i<lrows;i++) { 2637 dnz[i] = PetscMin(dnz[i],lcols); 2638 onz[i] = PetscMin(onz[i],cols-lcols); 2639 } 2640 /* set preallocation */ 2641 ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr); 2642 for (i=0;i<lrows/bs;i++) { 2643 dnz[i] = dnz[i*bs]/bs; 2644 onz[i] = onz[i*bs]/bs; 2645 } 2646 ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2647 for (i=0;i<lrows/bs;i++) { 2648 dnz[i] = dnz[i]-i; 2649 } 2650 ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr); 2651 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2652 *M = new_mat; 2653 } else { 2654 PetscInt mbs,mrows,mcols; 2655 /* some checks */ 2656 ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr); 2657 ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr); 2658 if (mrows != rows) { 2659 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows); 2660 } 2661 if (mrows != rows) { 2662 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols); 2663 } 2664 if (mbs != bs) { 2665 SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs); 2666 } 2667 ierr = MatZeroEntries(*M);CHKERRQ(ierr); 2668 } 2669 /* set local to global mappings */ 2670 /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */ 2671 /* Set values */ 2672 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2673 if (isdense) { /* special case for dense local matrices */ 2674 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2675 ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr); 2676 ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2677 ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr); 2678 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2679 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2680 } else { /* very basic values insertion for all other matrix types */ 2681 ierr = PetscFree(local_indices);CHKERRQ(ierr); 2682 ierr = PetscFree(global_indices);CHKERRQ(ierr); 2683 for (i=0;i<local_rows;i++) { 2684 ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2685 /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */ 2686 ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr); 2687 ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr); 2688 ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr); 2689 ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr); 2690 } 2691 } 2692 ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2693 ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2694 if (isdense) { 2695 ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 2696 } 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatISSubassemble_Private" 2702 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends) 2703 { 2704 Mat subdomain_adj; 2705 IS new_ranks,ranks_send_to; 2706 MatPartitioning partitioner; 2707 Mat_IS *matis; 2708 PetscInt n_neighs,*neighs,*n_shared,**shared; 2709 PetscInt prank; 2710 PetscMPIInt size,rank,color; 2711 PetscInt *xadj,*adjncy,*oldranks; 2712 PetscInt *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx; 2713 PetscInt i,j,n_subdomains,local_size,threshold=0; 2714 PetscErrorCode ierr; 2715 PetscBool use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE; 2716 PetscSubcomm subcomm; 2717 2718 PetscFunctionBegin; 2719 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr); 2720 ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 2721 ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 2722 2723 /* Get info on mapping */ 2724 matis = (Mat_IS*)(mat->data); 2725 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr); 2726 ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2727 2728 /* build local CSR graph of subdomains' connectivity */ 2729 ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr); 2730 xadj[0] = 0; 2731 xadj[1] = PetscMax(n_neighs-1,0); 2732 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr); 2733 ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr); 2734 2735 if (threshold) { 2736 PetscInt* count,min_threshold; 2737 ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr); 2738 ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr); 2739 for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */ 2740 for (j=0;j<n_shared[i];j++) { 2741 count[shared[i][j]] += 1; 2742 } 2743 } 2744 /* adapt threshold since we dont want to lose significant connections */ 2745 min_threshold = n_neighs; 2746 for (i=1;i<n_neighs;i++) { 2747 for (j=0;j<n_shared[i];j++) { 2748 min_threshold = PetscMin(count[shared[i][j]],min_threshold); 2749 } 2750 } 2751 threshold = PetscMax(min_threshold+1,threshold); 2752 xadj[1] = 0; 2753 for (i=1;i<n_neighs;i++) { 2754 for (j=0;j<n_shared[i];j++) { 2755 if (count[shared[i][j]] < threshold) { 2756 adjncy[xadj[1]] = neighs[i]; 2757 adjncy_wgt[xadj[1]] = n_shared[i]; 2758 xadj[1]++; 2759 break; 2760 } 2761 } 2762 } 2763 ierr = PetscFree(count);CHKERRQ(ierr); 2764 } else { 2765 if (xadj[1]) { 2766 ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr); 2767 ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr); 2768 } 2769 } 2770 ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 2771 if (use_square) { 2772 for (i=0;i<xadj[1];i++) { 2773 adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i]; 2774 } 2775 } 2776 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2777 2778 ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr); 2779 2780 /* 2781 Restrict work on active processes only. 2782 */ 2783 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr); 2784 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 2785 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 2786 ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr); 2787 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 2788 if (color) { 2789 ierr = PetscFree(xadj);CHKERRQ(ierr); 2790 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2791 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 2792 } else { 2793 ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr); 2794 ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr); 2795 prank = rank; 2796 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr); 2797 for (i=0;i<size;i++) { 2798 PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]); 2799 } 2800 for (i=0;i<xadj[1];i++) { 2801 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 2802 } 2803 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 2804 ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 2805 n_subdomains = (PetscInt)size; 2806 ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); 2807 2808 /* Partition */ 2809 ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr); 2810 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 2811 if (use_vwgt) { 2812 ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr); 2813 v_wgt[0] = local_size; 2814 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 2815 } 2816 ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2817 ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr); 2818 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 2819 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 2820 ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); 2821 2822 ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2823 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 2824 ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr); 2825 /* clean up */ 2826 ierr = PetscFree(oldranks);CHKERRQ(ierr); 2827 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 2828 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 2829 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 2830 } 2831 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 2832 2833 /* assemble parallel IS for sends */ 2834 i = 1; 2835 if (color) i=0; 2836 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr); 2837 ierr = ISView(ranks_send_to,0);CHKERRQ(ierr); 2838 2839 /* get back IS */ 2840 *is_sends = ranks_send_to; 2841 PetscFunctionReturn(0); 2842 } 2843 2844 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatISSubassemble" 2848 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n) 2849 { 2850 Mat local_mat,new_mat; 2851 Mat_IS *matis; 2852 IS is_sends_internal; 2853 PetscInt rows,cols; 2854 PetscInt i,bs,buf_size_idxs,buf_size_vals; 2855 PetscBool ismatis,isdense; 2856 ISLocalToGlobalMapping l2gmap; 2857 PetscInt* l2gmap_indices; 2858 const PetscInt* is_indices; 2859 MatType new_local_type; 2860 MatTypePrivate new_local_type_private; 2861 /* buffers */ 2862 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 2863 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 2864 /* MPI */ 2865 MPI_Comm comm; 2866 PetscMPIInt n_sends,n_recvs,commsize; 2867 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals; 2868 PetscMPIInt *onodes,*olengths_idxs,*olengths_vals; 2869 PetscMPIInt len,tag_idxs,tag_vals,source_dest; 2870 MPI_Request *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals; 2871 PetscErrorCode ierr; 2872 2873 PetscFunctionBegin; 2874 /* checks */ 2875 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 2876 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__); 2877 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 2878 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 2879 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 2880 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 2881 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 2882 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 2883 PetscValidLogicalCollectiveInt(mat,bs,0); 2884 /* prepare IS for sending if not provided */ 2885 if (!is_sends) { 2886 if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio"); 2887 ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr); 2888 } else { 2889 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 2890 is_sends_internal = is_sends; 2891 } 2892 2893 /* get pointer of MATIS data */ 2894 matis = (Mat_IS*)mat->data; 2895 2896 /* get comm */ 2897 comm = PetscObjectComm((PetscObject)mat); 2898 2899 /* compute number of sends */ 2900 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 2901 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 2902 2903 /* compute number of receives */ 2904 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 2905 ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr); 2906 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 2907 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2908 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 2909 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 2910 ierr = PetscFree(iflags);CHKERRQ(ierr); 2911 2912 /* prepare send/receive buffers */ 2913 ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr); 2914 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 2915 ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr); 2916 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 2917 2918 /* Get data from local mat */ 2919 if (!isdense) { 2920 /* TODO: See below some guidelines on how to prepare the local buffers */ 2921 /* 2922 send_buffer_vals should contain the raw values of the local matrix 2923 send_buffer_idxs should contain: 2924 - MatType_PRIVATE type 2925 - PetscInt size_of_l2gmap 2926 - PetscInt global_row_indices[size_of_l2gmap] 2927 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 2928 */ 2929 } else { 2930 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 2931 ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr); 2932 ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr); 2933 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 2934 send_buffer_idxs[1] = i; 2935 ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2936 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 2937 ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 2938 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 2939 for (i=0;i<n_sends;i++) { 2940 ilengths_vals[is_indices[i]] = len*len; 2941 ilengths_idxs[is_indices[i]] = len+2; 2942 } 2943 } 2944 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 2945 buf_size_idxs = 0; 2946 buf_size_vals = 0; 2947 for (i=0;i<n_recvs;i++) { 2948 buf_size_idxs += (PetscInt)olengths_idxs[i]; 2949 buf_size_vals += (PetscInt)olengths_vals[i]; 2950 } 2951 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr); 2952 ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr); 2953 2954 /* get new tags for clean communications */ 2955 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 2956 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 2957 2958 /* allocate for requests */ 2959 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr); 2960 ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr); 2961 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr); 2962 ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr); 2963 2964 /* communications */ 2965 ptr_idxs = recv_buffer_idxs; 2966 ptr_vals = recv_buffer_vals; 2967 for (i=0;i<n_recvs;i++) { 2968 source_dest = onodes[i]; 2969 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 2970 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 2971 ptr_idxs += olengths_idxs[i]; 2972 ptr_vals += olengths_vals[i]; 2973 } 2974 for (i=0;i<n_sends;i++) { 2975 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 2976 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 2977 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 2978 } 2979 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 2980 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 2981 2982 /* assemble new l2g map */ 2983 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2984 ptr_idxs = recv_buffer_idxs; 2985 buf_size_idxs = 0; 2986 for (i=0;i<n_recvs;i++) { 2987 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2988 ptr_idxs += olengths_idxs[i]; 2989 } 2990 ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr); 2991 ptr_idxs = recv_buffer_idxs; 2992 buf_size_idxs = 0; 2993 for (i=0;i<n_recvs;i++) { 2994 ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 2995 buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 2996 ptr_idxs += olengths_idxs[i]; 2997 } 2998 ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr); 2999 ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 3000 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 3001 3002 /* infer new local matrix type from received local matrices type */ 3003 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 3004 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 3005 new_local_type_private = MATAIJ_PRIVATE; 3006 new_local_type = MATSEQAIJ; 3007 if (n_recvs) { 3008 new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 3009 ptr_idxs = recv_buffer_idxs; 3010 for (i=0;i<n_recvs;i++) { 3011 if ((PetscInt)new_local_type_private != *ptr_idxs) { 3012 new_local_type_private = MATAIJ_PRIVATE; 3013 break; 3014 } 3015 ptr_idxs += olengths_idxs[i]; 3016 } 3017 switch (new_local_type_private) { 3018 case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */ 3019 new_local_type = MATSEQAIJ; 3020 bs = 1; 3021 break; 3022 case MATAIJ_PRIVATE: 3023 new_local_type = MATSEQAIJ; 3024 bs = 1; 3025 break; 3026 case MATBAIJ_PRIVATE: 3027 new_local_type = MATSEQBAIJ; 3028 break; 3029 case MATSBAIJ_PRIVATE: 3030 new_local_type = MATSEQSBAIJ; 3031 break; 3032 default: 3033 SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__); 3034 break; 3035 } 3036 } 3037 3038 /* create MATIS object */ 3039 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3040 ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr); 3041 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 3042 ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr); 3043 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 3044 ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */ 3045 3046 /* set values */ 3047 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3048 ptr_vals = recv_buffer_vals; 3049 ptr_idxs = recv_buffer_idxs; 3050 for (i=0;i<n_recvs;i++) { 3051 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 3052 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3053 ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 3054 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 3056 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3057 } 3058 ptr_idxs += olengths_idxs[i]; 3059 ptr_vals += olengths_vals[i]; 3060 } 3061 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3063 ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3064 ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3065 3066 { /* check */ 3067 Vec lvec,rvec; 3068 PetscReal infty_error; 3069 3070 ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 3071 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 3072 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 3073 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 3074 ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr); 3075 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3076 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 3077 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 3078 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 3079 } 3080 3081 /* free workspace */ 3082 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 3083 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 3084 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3085 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 3086 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 3087 if (isdense) { 3088 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 3089 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 3090 } else { 3091 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 3092 } 3093 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 3094 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 3095 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 3096 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 3097 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 3098 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 3099 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 3100 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 3101 ierr = PetscFree(onodes);CHKERRQ(ierr); 3102 /* get back new mat */ 3103 *mat_n = new_mat; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 #undef __FUNCT__ 3108 #define __FUNCT__ "PCBDDCSetUpCoarseSolver" 3109 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 3110 { 3111 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3112 PC_IS *pcis = (PC_IS*)pc->data; 3113 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 3114 MatNullSpace CoarseNullSpace=NULL; 3115 ISLocalToGlobalMapping coarse_islg; 3116 IS coarse_is; 3117 PetscInt max_it; 3118 PetscInt im_active=-1,active_procs=-1; 3119 PC pc_temp; 3120 PCType coarse_pc_type; 3121 KSPType coarse_ksp_type; 3122 PetscBool multilevel_requested,multilevel_allowed; 3123 PetscBool setsym,issym,isbddc,isnn,coarse_reuse; 3124 MatStructure matstruct; 3125 PetscErrorCode ierr; 3126 3127 PetscFunctionBegin; 3128 /* Assign global numbering to coarse dofs */ 3129 if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */ 3130 PetscInt ocoarse_size; 3131 ocoarse_size = pcbddc->coarse_size; 3132 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3133 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 3134 /* see if we can avoid some work */ 3135 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 3136 if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */ 3137 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3138 coarse_reuse = PETSC_FALSE; 3139 } else { /* we can safely reuse already computed coarse matrix */ 3140 coarse_reuse = PETSC_TRUE; 3141 } 3142 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 3143 coarse_reuse = PETSC_FALSE; 3144 } 3145 } else { /* primal space has not been changed, so we can reuse coarse matrix */ 3146 coarse_reuse = PETSC_TRUE; 3147 } 3148 3149 /* infer some info from user */ 3150 issym = PETSC_FALSE; 3151 ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr); 3152 multilevel_allowed = PETSC_FALSE; 3153 multilevel_requested = PETSC_FALSE; 3154 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 3155 if (multilevel_requested) { 3156 /* count "active processes" */ 3157 im_active = !!(pcis->n); 3158 ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3159 if (active_procs/pcbddc->coarsening_ratio < 2) { 3160 multilevel_allowed = PETSC_FALSE; 3161 } else { 3162 multilevel_allowed = PETSC_TRUE; 3163 } 3164 } 3165 3166 /* set defaults for coarse KSP and PC */ 3167 if (multilevel_allowed) { 3168 if (issym) { 3169 coarse_ksp_type = KSPRICHARDSON; 3170 } else { 3171 coarse_ksp_type = KSPCHEBYSHEV; 3172 } 3173 coarse_pc_type = PCBDDC; 3174 } else { 3175 coarse_ksp_type = KSPPREONLY; 3176 coarse_pc_type = PCREDUNDANT; 3177 } 3178 3179 /* create the coarse KSP object only once with defaults */ 3180 if (!pcbddc->coarse_ksp) { 3181 char prefix[256],str_level[3]; 3182 size_t len; 3183 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr); 3184 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 3185 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 3186 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3187 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3188 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3189 /* prefix */ 3190 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 3191 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 3192 if (!pcbddc->current_level) { 3193 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 3194 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 3195 } else { 3196 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 3197 if (pcbddc->current_level>1) len -= 2; 3198 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr); 3199 *(prefix+len)='\0'; 3200 sprintf(str_level,"%d_",(int)(pcbddc->current_level)); 3201 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 3202 } 3203 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 3204 } 3205 /* allow user customization */ 3206 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3207 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 3208 /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */ 3209 3210 /* get some info after set from options */ 3211 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 3212 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 3213 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 3214 if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */ 3215 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 3216 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 3217 isbddc = PETSC_FALSE; 3218 } 3219 3220 /* propagate BDDC info to the next level */ 3221 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 3222 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3223 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 3224 3225 /* creates temporary MATIS object for coarse matrix */ 3226 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 3227 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 3228 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr); 3229 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr); 3230 ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 3231 ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3232 ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3233 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 3234 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 3235 3236 /* assemble coarse matrix */ 3237 if (isbddc || isnn) { 3238 ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr); 3239 } else { 3240 if (coarse_reuse) { 3241 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr); 3242 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 3243 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr); 3244 } else { 3245 ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr); 3246 } 3247 } 3248 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 3249 3250 /* create local to global scatters for coarse problem */ 3251 if (pcbddc->new_primal_space) { 3252 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3253 ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr); 3254 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3255 ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr); 3256 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3257 } 3258 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 3259 3260 /* propagate symmetry info to coarse matrix */ 3261 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr); 3262 3263 /* Compute coarse null space (special handling by BDDC only) */ 3264 if (pcbddc->NullSpace) { 3265 ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr); 3266 if (isbddc) { 3267 ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr); 3268 } else { 3269 ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr); 3270 } 3271 } 3272 3273 /* set operators */ 3274 ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr); 3275 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr); 3276 3277 /* additional KSP customization */ 3278 ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr); 3279 if (max_it < 5) { 3280 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 3281 } 3282 /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */ 3283 3284 3285 /* print some info if requested */ 3286 if (pcbddc->dbg_flag) { 3287 ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr); 3288 ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr); 3289 if (!multilevel_allowed) { 3290 if (multilevel_requested) { 3291 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 3292 } else if (pcbddc->max_levels) { 3293 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 3294 } 3295 } 3296 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 3297 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3298 } 3299 3300 /* setup coarse ksp */ 3301 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 3302 if (pcbddc->dbg_flag) { 3303 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 3304 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3305 ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr); 3306 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3307 } 3308 3309 /* Check coarse problem if requested */ 3310 if (pcbddc->dbg_flag) { 3311 KSP check_ksp; 3312 KSPType check_ksp_type; 3313 PC check_pc; 3314 Vec check_vec; 3315 PetscReal abs_infty_error,infty_error,lambda_min,lambda_max; 3316 PetscInt its; 3317 PetscBool ispreonly,compute; 3318 3319 /* Create ksp object suitable for estimation of extreme eigenvalues */ 3320 ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr); 3321 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr); 3322 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 3323 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 3324 if (ispreonly) { 3325 check_ksp_type = KSPPREONLY; 3326 compute = PETSC_FALSE; 3327 } else { 3328 if (issym) check_ksp_type = KSPCG; 3329 else check_ksp_type = KSPGMRES; 3330 compute = PETSC_TRUE; 3331 } 3332 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 3333 ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr); 3334 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 3335 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 3336 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 3337 /* create random vec */ 3338 ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr); 3339 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 3340 if (CoarseNullSpace) { 3341 ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr); 3342 } 3343 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3344 /* solve coarse problem */ 3345 ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr); 3346 if (CoarseNullSpace) { 3347 ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr); 3348 } 3349 /* check coarse problem residual error */ 3350 ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr); 3351 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 3352 ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr); 3353 ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 3354 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 3355 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr); 3356 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 3357 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 3358 /* get eigenvalue estimation if preonly has not been requested */ 3359 if (!ispreonly) { 3360 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr); 3361 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 3362 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr); 3363 } 3364 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3365 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 3366 } 3367 /* free memory */ 3368 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 3369 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "PCBDDCComputePrimalNumbering" 3375 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 3376 { 3377 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3378 PC_IS* pcis = (PC_IS*)pc->data; 3379 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 3380 PetscInt i,coarse_size; 3381 PetscInt *local_primal_indices; 3382 PetscErrorCode ierr; 3383 3384 PetscFunctionBegin; 3385 /* Compute global number of coarse dofs */ 3386 if (!pcbddc->primal_indices_local_idxs) { 3387 SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created"); 3388 } 3389 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr); 3390 3391 /* check numbering */ 3392 if (pcbddc->dbg_flag) { 3393 PetscScalar coarsesum,*array; 3394 3395 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3396 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3397 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 3398 ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr); 3399 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 3400 for (i=0;i<pcbddc->local_primal_size;i++) { 3401 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 3402 } 3403 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 3404 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 3405 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3406 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3407 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3408 ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3409 ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3410 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3411 for (i=0;i<pcis->n;i++) { 3412 if (array[i] == 1.0) { 3413 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr); 3414 } 3415 } 3416 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3417 for (i=0;i<pcis->n;i++) { 3418 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 3419 } 3420 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 3421 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 3422 ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3423 ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 3424 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 3425 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 3426 if (pcbddc->dbg_flag > 1) { 3427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 3428 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3429 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 3430 for (i=0;i<pcbddc->local_primal_size;i++) { 3431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]); 3432 } 3433 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3434 } 3435 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3436 } 3437 /* get back data */ 3438 *coarse_size_n = coarse_size; 3439 *local_primal_indices_n = local_primal_indices; 3440 PetscFunctionReturn(0); 3441 } 3442 3443