1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 4 static PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCSubSchursSetUpNew" 8 PetscErrorCode PCBDDCSubSchursSetUpNew(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 9 { 10 Mat A_II,A_IB,A_BI,A_BB; 11 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 12 Mat S_all,global_schur_subsets,work_mat; 13 ISLocalToGlobalMapping l2gmap_subsets; 14 IS is_I,*is_subset_B,temp_is; 15 PetscInt *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N; 16 PetscInt i,subset_size,max_subset_size; 17 PetscInt extra,local_size,global_size; 18 PetscErrorCode ierr; 19 20 PetscFunctionBegin; 21 22 /* allocate space for schur complements */ 23 ierr = PetscMalloc4(sub_schurs->n_subs,&sub_schurs->is_AEj_B, 24 sub_schurs->n_subs,&sub_schurs->S_Ej, 25 sub_schurs->n_subs,&sub_schurs->work1, 26 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 27 28 /* get Schur complement matrices */ 29 if (!sub_schurs->use_mumps) { 30 ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 31 ierr = PetscMalloc4(sub_schurs->n_subs,&is_subset_B, 32 sub_schurs->n_subs,&AE_IE, 33 sub_schurs->n_subs,&AE_EI, 34 sub_schurs->n_subs,&AE_EE);CHKERRQ(ierr); 35 } 36 37 /* determine interior problems */ 38 if (nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 39 PetscBT touched; 40 const PetscInt* idx_B; 41 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 42 43 /* get sizes */ 44 ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 45 ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr); 46 47 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 48 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 49 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 50 51 /* all boundary dofs must be skipped when adding layers */ 52 ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 53 for (j=0;j<n_B;j++) { 54 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 55 } 56 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 57 ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 58 59 /* add prescribed number of layers of dofs */ 60 n_local_dofs = n_B; 61 n_prev_added = n_B; 62 for (layer=0;layer<nlayers;layer++) { 63 PetscInt n_added; 64 if (n_local_dofs == n_I+n_B) break; 65 if (n_local_dofs > n_I+n_B) { 66 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 67 } 68 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 69 n_prev_added = n_added; 70 n_local_dofs += n_added; 71 if (!n_added) break; 72 } 73 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 74 75 /* IS for I layer dofs in original numbering */ 76 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr); 77 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 78 ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr); 79 /* IS for I layer dofs in I numbering */ 80 if (!sub_schurs->use_mumps) { 81 ISLocalToGlobalMapping ItoNmap; 82 ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr); 83 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr); 84 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 85 86 /* II block */ 87 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 88 } 89 } else { 90 PetscInt n_I; 91 92 /* IS for I dofs in original numbering */ 93 ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr); 94 sub_schurs->is_I_layer = sub_schurs->is_I; 95 96 /* IS for I dofs in I numbering (strided 1) */ 97 if (!sub_schurs->use_mumps) { 98 ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 99 ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr); 100 101 /* II block is the same */ 102 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 103 AE_II = A_II; 104 } 105 } 106 107 for (i=0;i<sub_schurs->n_subs;i++) { 108 ierr = ISDuplicate(sub_schurs->is_subs[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 109 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 110 } 111 112 /* Get info on subset sizes and sum of all subsets sizes */ 113 max_subset_size = 0; 114 local_size = 0; 115 for (i=0;i<sub_schurs->n_subs_seq;i++) { 116 PetscInt j = sub_schurs->index_sequential[i]; 117 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[j],&subset_size);CHKERRQ(ierr); 118 max_subset_size = PetscMax(subset_size,max_subset_size); 119 local_size += subset_size; 120 } 121 122 /* Work arrays for local indices */ 123 ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr); 124 extra = 0; 125 if (sub_schurs->use_mumps) { 126 ierr = ISGetLocalSize(sub_schurs->is_I_layer,&extra);CHKERRQ(ierr); 127 } 128 ierr = PetscMalloc1(local_size+extra,&all_local_idx_N);CHKERRQ(ierr); 129 if (extra) { 130 const PetscInt *idxs; 131 ierr = ISGetIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr); 132 ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr); 133 ierr = ISRestoreIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr); 134 } 135 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 136 137 /* Get local indices in local numbering */ 138 local_size = 0; 139 for (i=0;i<sub_schurs->n_subs_seq;i++) { 140 PetscInt j; 141 const PetscInt *idxs; 142 143 PetscInt local_problem_index = sub_schurs->index_sequential[i]; 144 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 145 ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 146 /* subset indices in local numbering */ 147 ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 148 ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 149 for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size; 150 local_size += subset_size; 151 } 152 153 S_all = NULL; 154 if (!sub_schurs->use_mumps) { 155 /* subsets in original and boundary numbering */ 156 for (i=0;i<sub_schurs->n_subs;i++) { 157 ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 158 } 159 160 /* EE block */ 161 for (i=0;i<sub_schurs->n_subs;i++) { 162 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 163 } 164 /* IE block */ 165 for (i=0;i<sub_schurs->n_subs;i++) { 166 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 167 } 168 /* EI block */ 169 for (i=0;i<sub_schurs->n_subs;i++) { 170 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 171 } 172 173 /* setup Schur complements on subset */ 174 for (i=0;i<sub_schurs->n_subs;i++) { 175 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 176 if (AE_II == A_II) { /* we can reuse the same ksp */ 177 KSP ksp; 178 ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr); 179 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 180 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 181 KSP origksp,schurksp; 182 PC origpc,schurpc; 183 KSPType ksp_type; 184 PCType pc_type; 185 PetscInt n_internal; 186 187 ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr); 188 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 189 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 190 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 191 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 192 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 193 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 194 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 195 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 196 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 197 MatSolverPackage solver=NULL; 198 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 199 if (solver) { 200 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 201 } 202 } 203 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 204 } 205 } 206 /* free */ 207 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 208 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 209 for (i=0;i<sub_schurs->n_subs;i++) { 210 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 211 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 212 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 213 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 214 } 215 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 216 #if defined(PETSC_HAVE_MUMPS) 217 } else { 218 Mat A,F; 219 IS is_A_all; 220 PetscBool is_symmetric; 221 PetscInt *idxs_schur,n_I; 222 223 /* get working mat */ 224 ierr = ISGetLocalSize(sub_schurs->is_I_layer,&n_I);CHKERRQ(ierr); 225 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);CHKERRQ(ierr); 226 ierr = MatGetSubMatrixUnsorted(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr); 227 ierr = ISDestroy(&is_A_all);CHKERRQ(ierr); 228 229 ierr = MatIsSymmetric(sub_schurs->A,0.0,&is_symmetric);CHKERRQ(ierr); 230 if (is_symmetric) { 231 ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr); 232 } else { 233 ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr); 234 } 235 236 /* subsets ordered last */ 237 ierr = PetscMalloc1(local_size,&idxs_schur);CHKERRQ(ierr); 238 for (i=0;i<local_size;i++) { 239 idxs_schur[i] = n_I+i+1; 240 } 241 ierr = MatMumpsSetSchurIndices(F,local_size,idxs_schur);CHKERRQ(ierr); 242 ierr = PetscFree(idxs_schur);CHKERRQ(ierr); 243 244 /* factorization step */ 245 if (is_symmetric) { 246 ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr); 247 ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr); 248 } else { 249 ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr); 250 ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr); 251 } 252 253 /* get explicit Schur Complement computed during numeric factorization */ 254 ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr); 255 256 /* free workspace */ 257 ierr = MatDestroy(&A);CHKERRQ(ierr); 258 ierr = MatDestroy(&F);CHKERRQ(ierr); 259 260 /* unused */ 261 for (i=0;i<sub_schurs->n_subs;i++) { 262 sub_schurs->S_Ej[i] = 0; 263 } 264 #endif 265 } 266 267 /* TODO: just for compatibility with the previous version, needs to be fixed */ 268 for (i=0;i<sub_schurs->n_subs_par;i++) { 269 PetscInt j = sub_schurs->index_parallel[i]; 270 ierr = MatCreateVecs(sub_schurs->S_Ej[j],&sub_schurs->work1[j],&sub_schurs->work2[j]);CHKERRQ(ierr); 271 } 272 for (i=0;i<sub_schurs->n_subs_seq;i++) { 273 sub_schurs->work1[sub_schurs->index_sequential[i]] = 0; 274 sub_schurs->work2[sub_schurs->index_sequential[i]] = 0; 275 } 276 277 if (!sub_schurs->n_subs_seq_g) { 278 sub_schurs->S_Ej_all = 0; 279 sub_schurs->sum_S_Ej_all = 0; 280 sub_schurs->is_Ej_all = 0; 281 PetscFunctionReturn(0); 282 } 283 284 /* subset indices in local boundary numbering */ 285 ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr); 286 if (subset_size != local_size) { 287 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size); 288 } 289 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr); 290 291 /* Local matrix of all local Schur on subsets */ 292 ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr); 293 ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr); 294 ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr); 295 ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr); 296 ierr = PetscFree(nnz);CHKERRQ(ierr); 297 298 if (!sub_schurs->use_mumps) { 299 PetscScalar *fill_vals; 300 PetscInt *dummy_idx; 301 302 /* Work arrays */ 303 ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr); 304 305 /* Loop on local problems to compute Schur complements explicitly (TODO; optimize)*/ 306 local_size = 0; 307 for (i=0;i<sub_schurs->n_subs_seq;i++) { 308 Vec work1,work2; 309 PetscInt j,local_problem_index = sub_schurs->index_sequential[i]; 310 311 ierr = MatCreateVecs(sub_schurs->S_Ej[local_problem_index],&work1,&work2);CHKERRQ(ierr); 312 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 313 /* local Schur */ 314 for (j=0;j<subset_size;j++) { 315 ierr = VecSet(work1,0.0);CHKERRQ(ierr); 316 ierr = VecSetValue(work1,j,1.0,INSERT_VALUES);CHKERRQ(ierr); 317 ierr = VecPlaceArray(work2,&fill_vals[j*subset_size]);CHKERRQ(ierr); 318 ierr = MatMult(sub_schurs->S_Ej[local_problem_index],work1,work2);CHKERRQ(ierr); 319 ierr = VecResetArray(work2);CHKERRQ(ierr); 320 } 321 for (j=0;j<subset_size;j++) { 322 dummy_idx[j]=local_size+j; 323 } 324 ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 325 local_size += subset_size; 326 ierr = VecDestroy(&work1);CHKERRQ(ierr); 327 ierr = VecDestroy(&work2);CHKERRQ(ierr); 328 } 329 ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr); 330 } else { 331 PetscInt *dummy_idx,n_all; 332 PetscScalar *vals,*fill_vals; 333 334 /* Work arrays */ 335 ierr = PetscMalloc1(max_subset_size,&dummy_idx);CHKERRQ(ierr); 336 ierr = MatGetSize(S_all,&n_all,NULL);CHKERRQ(ierr); 337 ierr = MatDenseGetArray(S_all,&vals);CHKERRQ(ierr); 338 local_size = 0; 339 subset_size = 0; 340 fill_vals = vals; 341 for (i=0;i<sub_schurs->n_subs_seq;i++) { 342 PetscInt j,lpi = sub_schurs->index_sequential[i]; 343 344 fill_vals += subset_size; 345 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[lpi],&subset_size);CHKERRQ(ierr); 346 for (j=0;j<subset_size;j++) { 347 dummy_idx[j]=local_size+j; 348 } 349 for (j=0;j<subset_size;j++) { 350 ierr = MatSetValues(sub_schurs->S_Ej_all,1,dummy_idx+j,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 351 fill_vals += n_all; 352 } 353 local_size += subset_size; 354 } 355 ierr = MatDenseRestoreArray(S_all,&vals);CHKERRQ(ierr); 356 ierr = PetscFree(dummy_idx);CHKERRQ(ierr); 357 } 358 ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 ierr = MatDestroy(&S_all);CHKERRQ(ierr); 361 362 /* Global matrix of all assembled Schur on subsets */ 363 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->l2gmap,local_size,all_local_idx_N+extra,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr); 364 ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr); 365 ierr = MatCreateIS(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr); 366 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr); 367 ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr); 368 ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr); 369 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 370 371 /* Get local part of (\sum_j S_Ej) */ 372 ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr); 373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->l2gmap),local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr); 374 ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 375 ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr); 376 ierr = ISDestroy(&temp_is);CHKERRQ(ierr); 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "PCBDDCSubSchursInit" 382 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscInt seqthreshold) 383 { 384 IS *faces,*edges,*all_cc; 385 PetscInt *index_sequential,*index_parallel; 386 PetscInt *auxlocal_sequential,*auxlocal_parallel; 387 PetscInt *auxglobal_sequential,*auxglobal_parallel; 388 PetscInt *auxmapping;//,*idxs; 389 PetscInt i,max_subset_size; 390 PetscInt n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems; 391 PetscInt n_faces,n_edges,n_all_cc; 392 PetscBool is_sorted; 393 PetscErrorCode ierr; 394 395 PetscFunctionBegin; 396 ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr); 397 if (!is_sorted) { 398 SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 399 } 400 ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr); 401 if (!is_sorted) { 402 SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 403 } 404 405 /* reset any previous data */ 406 ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr); 407 408 /* get index sets for faces and edges */ 409 ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr); 410 n_all_cc = n_faces+n_edges; 411 ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr); 412 for (i=0;i<n_faces;i++) { 413 all_cc[i] = faces[i]; 414 } 415 for (i=0;i<n_edges;i++) { 416 all_cc[n_faces+i] = edges[i]; 417 } 418 ierr = PetscFree(faces);CHKERRQ(ierr); 419 ierr = PetscFree(edges);CHKERRQ(ierr); 420 421 /* map interface's subsets */ 422 max_subset_size = 0; 423 for (i=0;i<n_all_cc;i++) { 424 PetscInt subset_size; 425 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 426 max_subset_size = PetscMax(max_subset_size,subset_size); 427 } 428 ierr = PetscMalloc1(max_subset_size,&auxmapping);CHKERRQ(ierr); 429 ierr = PetscMalloc2(graph->ncc,&auxlocal_sequential, 430 graph->ncc,&auxlocal_parallel);CHKERRQ(ierr); 431 ierr = PetscMalloc2(graph->ncc,&index_sequential, 432 graph->ncc,&index_parallel);CHKERRQ(ierr); 433 434 /* if threshold is negative uses all sequential problems (possibly using MUMPS) */ 435 sub_schurs->use_mumps = PETSC_FALSE; 436 if (seqthreshold < 0) { 437 seqthreshold = max_subset_size; 438 #if defined(PETSC_HAVE_MUMPS) 439 sub_schurs->use_mumps = !!A; 440 #endif 441 } 442 443 444 /* determine which problem has to be solved in parallel or sequentially */ 445 n_local_sequential_problems = 0; 446 n_local_parallel_problems = 0; 447 for (i=0;i<n_all_cc;i++) { 448 PetscInt subset_size,j,min_loc = 0; 449 const PetscInt *idxs; 450 451 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 452 ierr = ISGetIndices(all_cc[i],&idxs);CHKERRQ(ierr); 453 ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr); 454 for (j=1;j<subset_size;j++) { 455 if (auxmapping[j]<auxmapping[min_loc]) { 456 min_loc = j; 457 } 458 } 459 if (subset_size > seqthreshold) { 460 index_parallel[n_local_parallel_problems] = i; 461 auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc]; 462 n_local_parallel_problems++; 463 } else { 464 index_sequential[n_local_sequential_problems] = i; 465 auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc]; 466 n_local_sequential_problems++; 467 } 468 ierr = ISRestoreIndices(all_cc[i],&idxs);CHKERRQ(ierr); 469 } 470 471 /* Number parallel problems */ 472 auxglobal_parallel = 0; 473 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr); 474 475 /* Number sequential problems */ 476 auxglobal_sequential = 0; 477 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr); 478 479 /* update info in sub_schurs */ 480 if (sub_schurs->use_mumps && A) { 481 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 482 sub_schurs->A = A; 483 } 484 ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr); 485 sub_schurs->S = S; 486 ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr); 487 sub_schurs->is_I = is_I; 488 ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr); 489 sub_schurs->is_B = is_B; 490 ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr); 491 sub_schurs->l2gmap = graph->l2gmap; 492 ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr); 493 sub_schurs->BtoNmap = BtoNmap; 494 sub_schurs->n_subs_seq = n_local_sequential_problems; 495 sub_schurs->n_subs_par = n_local_parallel_problems; 496 sub_schurs->n_subs_seq_g = n_sequential_problems; 497 sub_schurs->n_subs_par_g = n_parallel_problems; 498 sub_schurs->n_subs = sub_schurs->n_subs_seq + sub_schurs->n_subs_par; 499 sub_schurs->is_subs = all_cc; 500 sub_schurs->index_sequential = index_sequential; 501 sub_schurs->index_parallel = index_parallel; 502 sub_schurs->auxglobal_sequential = auxglobal_sequential; 503 sub_schurs->auxglobal_parallel = auxglobal_parallel; 504 505 /* free workspace */ 506 ierr = PetscFree(auxmapping);CHKERRQ(ierr); 507 ierr = PetscFree2(auxlocal_sequential,auxlocal_parallel);CHKERRQ(ierr); 508 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNCT__ 513 #define __FUNCT__ "PCBDDCSubSchursCreate" 514 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 515 { 516 PCBDDCSubSchurs schurs_ctx; 517 PetscErrorCode ierr; 518 519 PetscFunctionBegin; 520 ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr); 521 schurs_ctx->n_subs = 0; 522 *sub_schurs = schurs_ctx; 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "PCBDDCSubSchursDestroy" 528 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 529 { 530 PetscErrorCode ierr; 531 532 PetscFunctionBegin; 533 ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr); 534 ierr = PetscFree(*sub_schurs);CHKERRQ(ierr); 535 PetscFunctionReturn(0); 536 } 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "PCBDDCSubSchursReset" 540 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 541 { 542 PetscInt i; 543 PetscErrorCode ierr; 544 545 PetscFunctionBegin; 546 ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr); 547 ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr); 548 ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr); 549 ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr); 550 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr); 551 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr); 552 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 553 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 554 ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr); 555 for (i=0;i<sub_schurs->n_subs;i++) { 556 ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr); 557 ierr = ISDestroy(&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 558 ierr = MatDestroy(&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 559 ierr = VecDestroy(&sub_schurs->work1[i]);CHKERRQ(ierr); 560 ierr = VecDestroy(&sub_schurs->work2[i]);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr); 563 if (sub_schurs->n_subs) { 564 ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr); 565 ierr = PetscFree4(sub_schurs->is_AEj_B,sub_schurs->S_Ej,sub_schurs->work1,sub_schurs->work2);CHKERRQ(ierr); 566 ierr = PetscFree2(sub_schurs->index_sequential,sub_schurs->index_parallel);CHKERRQ(ierr); 567 ierr = PetscFree(sub_schurs->auxglobal_sequential);CHKERRQ(ierr); 568 ierr = PetscFree(sub_schurs->auxglobal_parallel);CHKERRQ(ierr); 569 } 570 sub_schurs->n_subs = 0; 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private" 576 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 577 { 578 PetscInt i,j,n; 579 PetscErrorCode ierr; 580 581 PetscFunctionBegin; 582 n = 0; 583 for (i=-n_prev;i<0;i++) { 584 PetscInt start_dof = queue_tip[i]; 585 for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 586 PetscInt dof = adjncy[j]; 587 if (!PetscBTLookup(touched,dof)) { 588 ierr = PetscBTSet(touched,dof);CHKERRQ(ierr); 589 queue_tip[n] = dof; 590 n++; 591 } 592 } 593 } 594 *n_added = n; 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "PCBDDCSubSchursSetUp" 600 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat S, IS is_A_I, IS is_A_B, PetscInt ncc, IS is_cc[], PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 601 { 602 Mat A_II,A_IB,A_BI,A_BB; 603 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 604 IS is_I,*is_subset_B; 605 ISLocalToGlobalMapping BtoNmap; 606 PetscInt i; 607 PetscBool is_sorted; 608 PetscErrorCode ierr; 609 610 PetscFunctionBegin; 611 ierr = ISSorted(is_A_I,&is_sorted);CHKERRQ(ierr); 612 if (!is_sorted) { 613 SETERRQ(PetscObjectComm((PetscObject)is_A_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 614 } 615 ierr = ISSorted(is_A_B,&is_sorted);CHKERRQ(ierr); 616 if (!is_sorted) { 617 SETERRQ(PetscObjectComm((PetscObject)is_A_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 618 } 619 620 /* get Schur complement matrices */ 621 ierr = MatSchurComplementGetSubMatrices(S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 622 623 /* allocate space for schur complements */ 624 sub_schurs->n_subs = ncc; 625 ierr = PetscMalloc4(sub_schurs->n_subs,&sub_schurs->is_AEj_B, 626 sub_schurs->n_subs,&sub_schurs->S_Ej, 627 sub_schurs->n_subs,&sub_schurs->work1, 628 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 629 ierr = PetscMalloc4(ncc,&is_subset_B,ncc,&AE_IE,ncc,&AE_EI,ncc,&AE_EE);CHKERRQ(ierr); 630 631 /* maps */ 632 if (sub_schurs->n_subs && nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 633 ISLocalToGlobalMapping ItoNmap; 634 PetscBT touched; 635 const PetscInt* idx_B; 636 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 637 638 /* get sizes */ 639 ierr = ISGetLocalSize(is_A_I,&n_I);CHKERRQ(ierr); 640 ierr = ISGetLocalSize(is_A_B,&n_B);CHKERRQ(ierr); 641 642 ierr = ISLocalToGlobalMappingCreateIS(is_A_I,&ItoNmap);CHKERRQ(ierr); 643 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 644 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 645 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 646 647 /* all boundary dofs must be skipped when adding layers */ 648 ierr = ISGetIndices(is_A_B,&idx_B);CHKERRQ(ierr); 649 for (j=0;j<n_B;j++) { 650 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 651 } 652 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 653 ierr = ISRestoreIndices(is_A_B,&idx_B);CHKERRQ(ierr); 654 655 /* add next layers of dofs */ 656 n_local_dofs = n_B; 657 n_prev_added = n_B; 658 for (layer=0;layer<nlayers;layer++) { 659 PetscInt n_added; 660 if (n_local_dofs == n_I+n_B) break; 661 if (n_local_dofs > n_I+n_B) { 662 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 663 } 664 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 665 n_prev_added = n_added; 666 n_local_dofs += n_added; 667 if (!n_added) break; 668 } 669 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 670 671 /* IS for I dofs in original numbering and in I numbering */ 672 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)ItoNmap),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr); 673 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 674 ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr); 675 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr); 676 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 677 678 /* II block */ 679 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 680 } else { 681 PetscInt n_I; 682 683 /* IS for I dofs in original numbering */ 684 ierr = PetscObjectReference((PetscObject)is_A_I);CHKERRQ(ierr); 685 sub_schurs->is_I_layer = is_A_I; 686 687 /* IS for I dofs in I numbering (strided 1) */ 688 ierr = ISGetSize(is_A_I,&n_I);CHKERRQ(ierr); 689 ierr = ISCreateStride(PetscObjectComm((PetscObject)is_A_I),n_I,0,1,&is_I);CHKERRQ(ierr); 690 691 /* II block is the same */ 692 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 693 AE_II = A_II; 694 } 695 696 /* subsets in original and boundary numbering */ 697 ierr = ISLocalToGlobalMappingCreateIS(is_A_B,&BtoNmap);CHKERRQ(ierr); 698 for (i=0;i<sub_schurs->n_subs;i++) { 699 ierr = ISDuplicate(is_cc[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 700 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 701 ierr = ISGlobalToLocalMappingApplyIS(BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 702 } 703 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 704 705 /* EE block */ 706 for (i=0;i<sub_schurs->n_subs;i++) { 707 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 708 } 709 /* IE block */ 710 for (i=0;i<sub_schurs->n_subs;i++) { 711 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 712 } 713 /* EI block */ 714 for (i=0;i<sub_schurs->n_subs;i++) { 715 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 716 } 717 718 /* setup Schur complements on subset */ 719 for (i=0;i<sub_schurs->n_subs;i++) { 720 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 721 ierr = MatCreateVecs(sub_schurs->S_Ej[i],&sub_schurs->work1[i],&sub_schurs->work2[i]);CHKERRQ(ierr); 722 if (AE_II == A_II) { /* we can reuse the same ksp */ 723 KSP ksp; 724 ierr = MatSchurComplementGetKSP(S,&ksp);CHKERRQ(ierr); 725 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 726 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 727 KSP origksp,schurksp; 728 PC origpc,schurpc; 729 KSPType ksp_type; 730 PCType pc_type; 731 PetscInt n_internal; 732 733 ierr = MatSchurComplementGetKSP(S,&origksp);CHKERRQ(ierr); 734 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 735 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 736 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 737 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 738 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 739 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 740 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 741 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 742 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 743 MatSolverPackage solver=NULL; 744 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 745 if (solver) { 746 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 747 } 748 } 749 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 750 } 751 } 752 /* free */ 753 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 754 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 755 for (i=0;i<sub_schurs->n_subs;i++) { 756 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 757 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 758 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 759 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 760 } 761 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 762 PetscFunctionReturn(0); 763 } 764