1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 4 static PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCSubSchursSetUpNew" 8 PetscErrorCode PCBDDCSubSchursSetUpNew(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 9 { 10 Mat A_II,A_IB,A_BI,A_BB; 11 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 12 Mat global_schur_subsets,*submat_global_schur_subsets,work_mat; 13 ISLocalToGlobalMapping l2gmap_subsets; 14 IS is_I,*is_subset_B,temp_is; 15 PetscInt *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G; 16 PetscInt i,subset_size,max_subset_size; 17 PetscInt local_size,global_size; 18 PetscBool implicit_schurs; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 //ierr = PetscObjectTypeCompare((PetscObject)sub_schurs->S,MATSCHURCOMPLEMENT,&implicit_schurs);CHKERRQ(ierr); 23 implicit_schurs = PETSC_TRUE; 24 /* allocate space for schur complements */ 25 ierr = PetscMalloc5(sub_schurs->n_subs,&sub_schurs->is_AEj_I, 26 sub_schurs->n_subs,&sub_schurs->is_AEj_B, 27 sub_schurs->n_subs,&sub_schurs->S_Ej, 28 sub_schurs->n_subs,&sub_schurs->work1, 29 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 30 31 /* get Schur complement matrices */ 32 if (implicit_schurs) { 33 ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 34 ierr = PetscMalloc4(sub_schurs->n_subs,&is_subset_B, 35 sub_schurs->n_subs,&AE_IE, 36 sub_schurs->n_subs,&AE_EI, 37 sub_schurs->n_subs,&AE_EE);CHKERRQ(ierr); 38 } 39 40 /* determine interior problems */ 41 if (nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 42 PetscBT touched; 43 const PetscInt* idx_B; 44 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 45 46 /* get sizes */ 47 ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 48 ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr); 49 50 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 51 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 52 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 53 54 /* all boundary dofs must be skipped when adding layers */ 55 ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 56 for (j=0;j<n_B;j++) { 57 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 58 } 59 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 60 ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 61 62 /* add prescribed number of layers of dofs */ 63 n_local_dofs = n_B; 64 n_prev_added = n_B; 65 for (layer=0;layer<nlayers;layer++) { 66 PetscInt n_added; 67 if (n_local_dofs == n_I+n_B) break; 68 if (n_local_dofs > n_I+n_B) { 69 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 70 } 71 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 72 n_prev_added = n_added; 73 n_local_dofs += n_added; 74 if (!n_added) break; 75 } 76 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 77 78 /* IS for I dofs in original numbering */ 79 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 80 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 81 ierr = ISSort(sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 82 /* IS for I dofs in boundary numbering */ 83 if (implicit_schurs) { 84 ISLocalToGlobalMapping ItoNmap; 85 ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr); 86 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_I[0],&is_I);CHKERRQ(ierr); 87 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 88 89 /* II block */ 90 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 91 } 92 } else { 93 PetscInt n_I; 94 95 /* IS for I dofs in original numbering */ 96 ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr); 97 sub_schurs->is_AEj_I[0] = sub_schurs->is_I; 98 99 /* IS for I dofs in I numbering (strided 1) */ 100 if (implicit_schurs) { 101 ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 102 ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr); 103 104 /* II block is the same */ 105 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 106 AE_II = A_II; 107 } 108 } 109 110 /* TODO: just for compatibility with the previous version, needs to be fixed */ 111 for (i=1;i<sub_schurs->n_subs;i++) { 112 ierr = PetscObjectReference((PetscObject)sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 113 sub_schurs->is_AEj_I[i] = sub_schurs->is_AEj_I[0]; 114 } 115 116 if (implicit_schurs) { 117 /* subsets in original and boundary numbering */ 118 for (i=0;i<sub_schurs->n_subs;i++) { 119 ierr = ISDuplicate(sub_schurs->is_subs[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 120 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 121 ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 122 } 123 124 /* EE block */ 125 for (i=0;i<sub_schurs->n_subs;i++) { 126 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 127 } 128 /* IE block */ 129 for (i=0;i<sub_schurs->n_subs;i++) { 130 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 131 } 132 /* EI block */ 133 for (i=0;i<sub_schurs->n_subs;i++) { 134 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 135 } 136 137 /* setup Schur complements on subset */ 138 for (i=0;i<sub_schurs->n_subs;i++) { 139 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 140 if (AE_II == A_II) { /* we can reuse the same ksp */ 141 KSP ksp; 142 ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr); 143 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 144 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 145 KSP origksp,schurksp; 146 PC origpc,schurpc; 147 KSPType ksp_type; 148 PCType pc_type; 149 PetscInt n_internal; 150 151 ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr); 152 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 153 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 154 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 155 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 156 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 157 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 158 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 159 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 160 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 161 MatSolverPackage solver=NULL; 162 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 163 if (solver) { 164 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 165 } 166 } 167 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 168 } 169 } 170 /* free */ 171 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 172 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 173 for (i=0;i<sub_schurs->n_subs;i++) { 174 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 175 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 176 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 177 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 178 } 179 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 180 } 181 182 /* TODO: just for compatibility with the previous version, needs to be fixed */ 183 for (i=0;i<sub_schurs->n_subs_par;i++) { 184 PetscInt j = sub_schurs->index_parallel[i]; 185 ierr = MatCreateVecs(sub_schurs->S_Ej[j],&sub_schurs->work1[j],&sub_schurs->work2[j]);CHKERRQ(ierr); 186 } 187 for (i=0;i<sub_schurs->n_subs_seq;i++) { 188 sub_schurs->work1[sub_schurs->index_sequential[i]] = 0; 189 sub_schurs->work2[sub_schurs->index_sequential[i]] = 0; 190 } 191 192 if (!sub_schurs->n_subs_seq_g) { 193 sub_schurs->S_Ej_all = 0; 194 sub_schurs->sum_S_Ej_all = 0; 195 sub_schurs->is_Ej_all = 0; 196 PetscFunctionReturn(0); 197 } 198 199 /* Get info on subset sizes and sum of all subsets sizes */ 200 max_subset_size = 0; 201 local_size = 0; 202 for (i=0;i<sub_schurs->n_subs_seq;i++) { 203 PetscInt j = sub_schurs->index_sequential[i]; 204 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[j],&subset_size);CHKERRQ(ierr); 205 max_subset_size = PetscMax(subset_size,max_subset_size); 206 local_size += subset_size; 207 } 208 209 /* Work arrays for local indices */ 210 ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr); 211 ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr); 212 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 213 214 /* Get local indices in local whole numbering and local boundary numbering */ 215 local_size = 0; 216 for (i=0;i<sub_schurs->n_subs_seq;i++) { 217 PetscInt j; 218 const PetscInt *idxs; 219 220 PetscInt local_problem_index = sub_schurs->index_sequential[i]; 221 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 222 ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 223 /* subset indices in local numbering */ 224 ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 225 ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 226 for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size; 227 local_size += subset_size; 228 } 229 230 /* subset indices in local boundary numbering */ 231 ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N,&subset_size,all_local_idx_B);CHKERRQ(ierr); 232 if (subset_size != local_size) { 233 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size); 234 } 235 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr); 236 237 /* Number dofs on all subsets (parallel) and sort numbering */ 238 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr); 239 ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr); 240 for (i=0;i<local_size;i++) { 241 all_permutation_G[i]=i; 242 } 243 ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr); 244 245 /* Local matrix of all local Schur on subsets */ 246 ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr); 247 ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr); 248 ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr); 249 ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr); 250 ierr = PetscFree(nnz);CHKERRQ(ierr); 251 252 if (implicit_schurs) { 253 PetscScalar *fill_vals; 254 PetscInt *dummy_idx; 255 256 ierr = MatSetOption(sub_schurs->S_Ej_all,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 257 258 /* Work arrays */ 259 ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr); 260 261 /* Loop on local problems to compute Schur complements explicitly (TODO; optimize)*/ 262 local_size = 0; 263 for (i=0;i<sub_schurs->n_subs_seq;i++) { 264 Vec work1,work2; 265 PetscInt j,local_problem_index = sub_schurs->index_sequential[i]; 266 267 ierr = MatCreateVecs(sub_schurs->S_Ej[local_problem_index],&work1,&work2);CHKERRQ(ierr); 268 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 269 /* local Schur */ 270 for (j=0;j<subset_size;j++) { 271 ierr = VecSet(work1,0.0);CHKERRQ(ierr); 272 ierr = VecSetValue(work1,j,1.0,INSERT_VALUES);CHKERRQ(ierr); 273 ierr = VecPlaceArray(work2,&fill_vals[j*subset_size]);CHKERRQ(ierr); 274 ierr = MatMult(sub_schurs->S_Ej[local_problem_index],work1,work2);CHKERRQ(ierr); 275 ierr = VecResetArray(work2);CHKERRQ(ierr); 276 } 277 for (j=0;j<subset_size;j++) { 278 dummy_idx[j]=local_size+j; 279 } 280 ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 281 local_size += subset_size; 282 ierr = VecDestroy(&work1);CHKERRQ(ierr); 283 ierr = VecDestroy(&work2);CHKERRQ(ierr); 284 } 285 ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr); 286 } 287 ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 288 ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 289 290 /* Global matrix of all assembled Schur on subsets */ 291 ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr); 292 ierr = MatCreateIS(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr); 293 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr); 294 ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr); 295 ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr); 296 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 297 298 /* Get local part of (\sum_j S_Ej) */ 299 for (i=0;i<local_size;i++) { 300 all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]]; 301 } 302 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->l2gmap),local_size,all_local_idx_N,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr); 303 ierr = MatGetSubMatrices(global_schur_subsets,1,&temp_is,&temp_is,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr); 304 ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr); 305 ierr = ISDestroy(&temp_is);CHKERRQ(ierr); 306 for (i=0;i<local_size;i++) { 307 all_local_idx_G[all_permutation_G[i]] = i; 308 } 309 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr); 310 ierr = ISSetPermutation(temp_is);CHKERRQ(ierr); 311 ierr = MatPermute(submat_global_schur_subsets[0],temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 312 ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr); 313 ierr = ISDestroy(&temp_is);CHKERRQ(ierr); 314 ierr = PetscFree(all_permutation_G);CHKERRQ(ierr); 315 PetscFunctionReturn(0); 316 } 317 318 #undef __FUNCT__ 319 #define __FUNCT__ "PCBDDCSubSchursInit" 320 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscInt seqthreshold) 321 { 322 IS *faces,*edges,*all_cc; 323 PetscInt *index_sequential,*index_parallel; 324 PetscInt *auxlocal_sequential,*auxlocal_parallel; 325 PetscInt *auxglobal_sequential,*auxglobal_parallel; 326 PetscInt *auxmapping;//,*idxs; 327 PetscInt i,max_subset_size; 328 PetscInt n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems; 329 PetscInt n_faces,n_edges,n_all_cc; 330 PetscBool is_sorted; 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr); 335 if (!is_sorted) { 336 SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 337 } 338 ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr); 339 if (!is_sorted) { 340 SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 341 } 342 343 /* reset any previous data */ 344 ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr); 345 346 /* get index sets for faces and edges */ 347 ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr); 348 n_all_cc = n_faces+n_edges; 349 ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr); 350 for (i=0;i<n_faces;i++) { 351 all_cc[i] = faces[i]; 352 } 353 for (i=0;i<n_edges;i++) { 354 all_cc[n_faces+i] = edges[i]; 355 } 356 ierr = PetscFree(faces);CHKERRQ(ierr); 357 ierr = PetscFree(edges);CHKERRQ(ierr); 358 359 /* map interface's subsets */ 360 max_subset_size = 0; 361 for (i=0;i<n_all_cc;i++) { 362 PetscInt subset_size; 363 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 364 max_subset_size = PetscMax(max_subset_size,subset_size); 365 } 366 ierr = PetscMalloc1(max_subset_size,&auxmapping);CHKERRQ(ierr); 367 ierr = PetscMalloc2(graph->ncc,&auxlocal_sequential, 368 graph->ncc,&auxlocal_parallel);CHKERRQ(ierr); 369 ierr = PetscMalloc2(graph->ncc,&index_sequential, 370 graph->ncc,&index_parallel);CHKERRQ(ierr); 371 372 /* if threshold is negative, uses all sequential problems */ 373 if (seqthreshold < 0) seqthreshold = max_subset_size; 374 375 /* determine which problem has to be solved in parallel or sequentially */ 376 n_local_sequential_problems = 0; 377 n_local_parallel_problems = 0; 378 for (i=0;i<n_all_cc;i++) { 379 PetscInt subset_size,j,min_loc = 0; 380 const PetscInt *idxs; 381 382 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 383 ierr = ISGetIndices(all_cc[i],&idxs);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr); 385 for (j=1;j<subset_size;j++) { 386 if (auxmapping[j]<auxmapping[min_loc]) { 387 min_loc = j; 388 } 389 } 390 if (subset_size > seqthreshold) { 391 index_parallel[n_local_parallel_problems] = i; 392 auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc]; 393 n_local_parallel_problems++; 394 } else { 395 index_sequential[n_local_sequential_problems] = i; 396 auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc]; 397 n_local_sequential_problems++; 398 } 399 ierr = ISRestoreIndices(all_cc[i],&idxs);CHKERRQ(ierr); 400 } 401 402 /* Number parallel problems */ 403 auxglobal_parallel = 0; 404 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr); 405 406 /* Number sequential problems */ 407 auxglobal_sequential = 0; 408 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr); 409 410 /* update info in sub_schurs */ 411 if (A) { 412 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 413 sub_schurs->A = A; 414 } 415 ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr); 416 sub_schurs->S = S; 417 ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr); 418 sub_schurs->is_I = is_I; 419 ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr); 420 sub_schurs->is_B = is_B; 421 ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr); 422 sub_schurs->l2gmap = graph->l2gmap; 423 ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr); 424 sub_schurs->BtoNmap = BtoNmap; 425 sub_schurs->n_subs_seq = n_local_sequential_problems; 426 sub_schurs->n_subs_par = n_local_parallel_problems; 427 sub_schurs->n_subs_seq_g = n_sequential_problems; 428 sub_schurs->n_subs_par_g = n_parallel_problems; 429 sub_schurs->n_subs = sub_schurs->n_subs_seq + sub_schurs->n_subs_par; 430 sub_schurs->is_subs = all_cc; 431 sub_schurs->index_sequential = index_sequential; 432 sub_schurs->index_parallel = index_parallel; 433 sub_schurs->auxglobal_sequential = auxglobal_sequential; 434 sub_schurs->auxglobal_parallel = auxglobal_parallel; 435 436 /* free workspace */ 437 ierr = PetscFree(auxmapping);CHKERRQ(ierr); 438 ierr = PetscFree2(auxlocal_sequential,auxlocal_parallel);CHKERRQ(ierr); 439 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "PCBDDCSubSchursCreate" 445 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 446 { 447 PCBDDCSubSchurs schurs_ctx; 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr); 452 schurs_ctx->n_subs = 0; 453 *sub_schurs = schurs_ctx; 454 PetscFunctionReturn(0); 455 } 456 457 #undef __FUNCT__ 458 #define __FUNCT__ "PCBDDCSubSchursDestroy" 459 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 460 { 461 PetscErrorCode ierr; 462 463 PetscFunctionBegin; 464 ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr); 465 ierr = PetscFree(*sub_schurs);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "PCBDDCSubSchursReset" 471 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 472 { 473 PetscInt i; 474 PetscErrorCode ierr; 475 476 PetscFunctionBegin; 477 ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr); 478 ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr); 479 ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr); 480 ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr); 481 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr); 482 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr); 483 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 484 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 485 ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr); 486 for (i=0;i<sub_schurs->n_subs;i++) { 487 ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr); 488 ierr = ISDestroy(&sub_schurs->is_AEj_I[i]);CHKERRQ(ierr); 489 ierr = ISDestroy(&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 490 ierr = MatDestroy(&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 491 ierr = VecDestroy(&sub_schurs->work1[i]);CHKERRQ(ierr); 492 ierr = VecDestroy(&sub_schurs->work2[i]);CHKERRQ(ierr); 493 } 494 if (sub_schurs->n_subs) { 495 ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr); 496 ierr = PetscFree5(sub_schurs->is_AEj_I,sub_schurs->is_AEj_B,sub_schurs->S_Ej,sub_schurs->work1,sub_schurs->work2);CHKERRQ(ierr); 497 ierr = PetscFree2(sub_schurs->index_sequential,sub_schurs->index_parallel);CHKERRQ(ierr); 498 ierr = PetscFree(sub_schurs->auxglobal_sequential);CHKERRQ(ierr); 499 ierr = PetscFree(sub_schurs->auxglobal_parallel);CHKERRQ(ierr); 500 } 501 sub_schurs->n_subs = 0; 502 PetscFunctionReturn(0); 503 } 504 505 #undef __FUNCT__ 506 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private" 507 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 508 { 509 PetscInt i,j,n; 510 PetscErrorCode ierr; 511 512 PetscFunctionBegin; 513 n = 0; 514 for (i=-n_prev;i<0;i++) { 515 PetscInt start_dof = queue_tip[i]; 516 for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 517 PetscInt dof = adjncy[j]; 518 if (!PetscBTLookup(touched,dof)) { 519 ierr = PetscBTSet(touched,dof);CHKERRQ(ierr); 520 queue_tip[n] = dof; 521 n++; 522 } 523 } 524 } 525 *n_added = n; 526 PetscFunctionReturn(0); 527 } 528 529 #undef __FUNCT__ 530 #define __FUNCT__ "PCBDDCSubSchursSetUp" 531 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat S, IS is_A_I, IS is_A_B, PetscInt ncc, IS is_cc[], PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 532 { 533 Mat A_II,A_IB,A_BI,A_BB; 534 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 535 IS is_I,*is_subset_B; 536 ISLocalToGlobalMapping BtoNmap; 537 PetscInt i; 538 PetscBool is_sorted; 539 PetscErrorCode ierr; 540 541 PetscFunctionBegin; 542 ierr = ISSorted(is_A_I,&is_sorted);CHKERRQ(ierr); 543 if (!is_sorted) { 544 SETERRQ(PetscObjectComm((PetscObject)is_A_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 545 } 546 ierr = ISSorted(is_A_B,&is_sorted);CHKERRQ(ierr); 547 if (!is_sorted) { 548 SETERRQ(PetscObjectComm((PetscObject)is_A_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 549 } 550 551 /* get Schur complement matrices */ 552 ierr = MatSchurComplementGetSubMatrices(S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 553 554 /* allocate space for schur complements */ 555 ierr = PetscMalloc5(sub_schurs->n_subs,&sub_schurs->is_AEj_I, 556 sub_schurs->n_subs,&sub_schurs->is_AEj_B, 557 sub_schurs->n_subs,&sub_schurs->S_Ej, 558 sub_schurs->n_subs,&sub_schurs->work1, 559 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 560 ierr = PetscMalloc4(ncc,&is_subset_B,ncc,&AE_IE,ncc,&AE_EI,ncc,&AE_EE);CHKERRQ(ierr); 561 sub_schurs->n_subs = ncc; 562 563 /* maps */ 564 if (sub_schurs->n_subs && nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 565 ISLocalToGlobalMapping ItoNmap; 566 PetscBT touched; 567 const PetscInt* idx_B; 568 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 569 570 /* get sizes */ 571 ierr = ISGetLocalSize(is_A_I,&n_I);CHKERRQ(ierr); 572 ierr = ISGetLocalSize(is_A_B,&n_B);CHKERRQ(ierr); 573 574 ierr = ISLocalToGlobalMappingCreateIS(is_A_I,&ItoNmap);CHKERRQ(ierr); 575 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 576 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 577 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 578 579 /* all boundary dofs must be skipped when adding layers */ 580 ierr = ISGetIndices(is_A_B,&idx_B);CHKERRQ(ierr); 581 for (j=0;j<n_B;j++) { 582 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 583 } 584 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 585 ierr = ISRestoreIndices(is_A_B,&idx_B);CHKERRQ(ierr); 586 587 /* add next layers of dofs */ 588 n_local_dofs = n_B; 589 n_prev_added = n_B; 590 for (layer=0;layer<nlayers;layer++) { 591 PetscInt n_added; 592 if (n_local_dofs == n_I+n_B) break; 593 if (n_local_dofs > n_I+n_B) { 594 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 595 } 596 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 597 n_prev_added = n_added; 598 n_local_dofs += n_added; 599 if (!n_added) break; 600 } 601 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 602 603 /* IS for I dofs in original numbering and in I numbering */ 604 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)ItoNmap),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 605 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 606 ierr = ISSort(sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 607 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_I[0],&is_I);CHKERRQ(ierr); 608 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 609 610 /* II block */ 611 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 612 } else { 613 PetscInt n_I; 614 615 /* IS for I dofs in original numbering */ 616 ierr = PetscObjectReference((PetscObject)is_A_I);CHKERRQ(ierr); 617 sub_schurs->is_AEj_I[0] = is_A_I; 618 619 /* IS for I dofs in I numbering (strided 1) */ 620 ierr = ISGetSize(is_A_I,&n_I);CHKERRQ(ierr); 621 ierr = ISCreateStride(PetscObjectComm((PetscObject)is_A_I),n_I,0,1,&is_I);CHKERRQ(ierr); 622 623 /* II block is the same */ 624 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 625 AE_II = A_II; 626 } 627 628 /* TODO: just for compatibility with the previous version, needs to be fixed */ 629 for (i=1;i<sub_schurs->n_subs;i++) { 630 ierr = PetscObjectReference((PetscObject)sub_schurs->is_AEj_I[0]);CHKERRQ(ierr); 631 sub_schurs->is_AEj_I[i] = sub_schurs->is_AEj_I[0]; 632 } 633 634 /* subsets in original and boundary numbering */ 635 ierr = ISLocalToGlobalMappingCreateIS(is_A_B,&BtoNmap);CHKERRQ(ierr); 636 for (i=0;i<sub_schurs->n_subs;i++) { 637 ierr = ISDuplicate(is_cc[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 638 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 639 ierr = ISGlobalToLocalMappingApplyIS(BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 640 } 641 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 642 643 /* EE block */ 644 for (i=0;i<sub_schurs->n_subs;i++) { 645 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 646 } 647 /* IE block */ 648 for (i=0;i<sub_schurs->n_subs;i++) { 649 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 650 } 651 /* EI block */ 652 for (i=0;i<sub_schurs->n_subs;i++) { 653 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 654 } 655 656 /* setup Schur complements on subset */ 657 for (i=0;i<sub_schurs->n_subs;i++) { 658 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 659 ierr = MatCreateVecs(sub_schurs->S_Ej[i],&sub_schurs->work1[i],&sub_schurs->work2[i]);CHKERRQ(ierr); 660 if (AE_II == A_II) { /* we can reuse the same ksp */ 661 KSP ksp; 662 ierr = MatSchurComplementGetKSP(S,&ksp);CHKERRQ(ierr); 663 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 664 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 665 KSP origksp,schurksp; 666 PC origpc,schurpc; 667 KSPType ksp_type; 668 PCType pc_type; 669 PetscInt n_internal; 670 671 ierr = MatSchurComplementGetKSP(S,&origksp);CHKERRQ(ierr); 672 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 673 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 674 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 675 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 676 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 677 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 678 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 679 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 680 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 681 MatSolverPackage solver=NULL; 682 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 683 if (solver) { 684 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 685 } 686 } 687 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 688 } 689 } 690 /* free */ 691 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 692 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 693 for (i=0;i<sub_schurs->n_subs;i++) { 694 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 695 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 696 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 697 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 698 } 699 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702