1 #include <../src/ksp/pc/impls/bddc/bddc.h> 2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 3 4 static PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*); 5 6 #undef __FUNCT__ 7 #define __FUNCT__ "PCBDDCSubSchursSetUpNew" 8 PetscErrorCode PCBDDCSubSchursSetUpNew(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 9 { 10 Mat A_II,A_IB,A_BI,A_BB; 11 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 12 Mat global_schur_subsets,*submat_global_schur_subsets,work_mat; 13 ISLocalToGlobalMapping l2gmap_subsets; 14 IS is_I,*is_subset_B,temp_is; 15 PetscInt *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N,*all_permutation_G; 16 PetscInt i,subset_size,max_subset_size; 17 PetscInt local_size,global_size; 18 PetscBool implicit_schurs; 19 PetscErrorCode ierr; 20 21 PetscFunctionBegin; 22 //ierr = PetscObjectTypeCompare((PetscObject)sub_schurs->S,MATSCHURCOMPLEMENT,&implicit_schurs);CHKERRQ(ierr); 23 implicit_schurs = PETSC_TRUE; 24 /* allocate space for schur complements */ 25 ierr = PetscMalloc4(sub_schurs->n_subs,&sub_schurs->is_AEj_B, 26 sub_schurs->n_subs,&sub_schurs->S_Ej, 27 sub_schurs->n_subs,&sub_schurs->work1, 28 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 29 30 /* get Schur complement matrices */ 31 if (implicit_schurs) { 32 ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 33 ierr = PetscMalloc4(sub_schurs->n_subs,&is_subset_B, 34 sub_schurs->n_subs,&AE_IE, 35 sub_schurs->n_subs,&AE_EI, 36 sub_schurs->n_subs,&AE_EE);CHKERRQ(ierr); 37 } 38 39 /* determine interior problems */ 40 if (nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 41 PetscBT touched; 42 const PetscInt* idx_B; 43 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 44 45 /* get sizes */ 46 ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 47 ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr); 48 49 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 50 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 51 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 52 53 /* all boundary dofs must be skipped when adding layers */ 54 ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 55 for (j=0;j<n_B;j++) { 56 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 57 } 58 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 59 ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr); 60 61 /* add prescribed number of layers of dofs */ 62 n_local_dofs = n_B; 63 n_prev_added = n_B; 64 for (layer=0;layer<nlayers;layer++) { 65 PetscInt n_added; 66 if (n_local_dofs == n_I+n_B) break; 67 if (n_local_dofs > n_I+n_B) { 68 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 69 } 70 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 71 n_prev_added = n_added; 72 n_local_dofs += n_added; 73 if (!n_added) break; 74 } 75 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 76 77 /* IS for I dofs in original numbering */ 78 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr); 79 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 80 ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr); 81 /* IS for I dofs in boundary numbering */ 82 if (implicit_schurs) { 83 ISLocalToGlobalMapping ItoNmap; 84 ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr); 85 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr); 86 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 87 88 /* II block */ 89 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 90 } 91 } else { 92 PetscInt n_I; 93 94 /* IS for I dofs in original numbering */ 95 ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr); 96 sub_schurs->is_I_layer = sub_schurs->is_I; 97 98 /* IS for I dofs in I numbering (strided 1) */ 99 if (implicit_schurs) { 100 ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr); 101 ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr); 102 103 /* II block is the same */ 104 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 105 AE_II = A_II; 106 } 107 } 108 109 if (implicit_schurs) { 110 /* subsets in original and boundary numbering */ 111 for (i=0;i<sub_schurs->n_subs;i++) { 112 ierr = ISDuplicate(sub_schurs->is_subs[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 113 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 114 ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 115 } 116 117 /* EE block */ 118 for (i=0;i<sub_schurs->n_subs;i++) { 119 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 120 } 121 /* IE block */ 122 for (i=0;i<sub_schurs->n_subs;i++) { 123 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 124 } 125 /* EI block */ 126 for (i=0;i<sub_schurs->n_subs;i++) { 127 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 128 } 129 130 /* setup Schur complements on subset */ 131 for (i=0;i<sub_schurs->n_subs;i++) { 132 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 133 if (AE_II == A_II) { /* we can reuse the same ksp */ 134 KSP ksp; 135 ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr); 136 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 137 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 138 KSP origksp,schurksp; 139 PC origpc,schurpc; 140 KSPType ksp_type; 141 PCType pc_type; 142 PetscInt n_internal; 143 144 ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr); 145 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 146 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 147 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 148 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 149 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 150 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 151 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 152 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 153 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 154 MatSolverPackage solver=NULL; 155 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 156 if (solver) { 157 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 158 } 159 } 160 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 161 } 162 } 163 /* free */ 164 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 165 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 166 for (i=0;i<sub_schurs->n_subs;i++) { 167 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 168 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 169 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 170 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 171 } 172 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 173 } 174 175 /* TODO: just for compatibility with the previous version, needs to be fixed */ 176 for (i=0;i<sub_schurs->n_subs_par;i++) { 177 PetscInt j = sub_schurs->index_parallel[i]; 178 ierr = MatCreateVecs(sub_schurs->S_Ej[j],&sub_schurs->work1[j],&sub_schurs->work2[j]);CHKERRQ(ierr); 179 } 180 for (i=0;i<sub_schurs->n_subs_seq;i++) { 181 sub_schurs->work1[sub_schurs->index_sequential[i]] = 0; 182 sub_schurs->work2[sub_schurs->index_sequential[i]] = 0; 183 } 184 185 if (!sub_schurs->n_subs_seq_g) { 186 sub_schurs->S_Ej_all = 0; 187 sub_schurs->sum_S_Ej_all = 0; 188 sub_schurs->is_Ej_all = 0; 189 PetscFunctionReturn(0); 190 } 191 192 /* Get info on subset sizes and sum of all subsets sizes */ 193 max_subset_size = 0; 194 local_size = 0; 195 for (i=0;i<sub_schurs->n_subs_seq;i++) { 196 PetscInt j = sub_schurs->index_sequential[i]; 197 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[j],&subset_size);CHKERRQ(ierr); 198 max_subset_size = PetscMax(subset_size,max_subset_size); 199 local_size += subset_size; 200 } 201 202 /* Work arrays for local indices */ 203 ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr); 204 ierr = PetscMalloc1(local_size,&all_local_idx_N);CHKERRQ(ierr); 205 ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr); 206 207 /* Get local indices in local whole numbering and local boundary numbering */ 208 local_size = 0; 209 for (i=0;i<sub_schurs->n_subs_seq;i++) { 210 PetscInt j; 211 const PetscInt *idxs; 212 213 PetscInt local_problem_index = sub_schurs->index_sequential[i]; 214 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 215 ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 216 /* subset indices in local numbering */ 217 ierr = PetscMemcpy(all_local_idx_N+local_size,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr); 219 for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size; 220 local_size += subset_size; 221 } 222 223 /* subset indices in local boundary numbering */ 224 ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N,&subset_size,all_local_idx_B);CHKERRQ(ierr); 225 if (subset_size != local_size) { 226 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size); 227 } 228 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr); 229 230 /* Number dofs on all subsets (parallel) and sort numbering */ 231 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->l2gmap,local_size,all_local_idx_N,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr); 232 ierr = PetscMalloc1(local_size,&all_permutation_G);CHKERRQ(ierr); 233 for (i=0;i<local_size;i++) { 234 all_permutation_G[i]=i; 235 } 236 ierr = PetscSortIntWithPermutation(local_size,all_local_idx_G,all_permutation_G);CHKERRQ(ierr); 237 238 /* Local matrix of all local Schur on subsets */ 239 ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr); 240 ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr); 241 ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr); 242 ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr); 243 ierr = PetscFree(nnz);CHKERRQ(ierr); 244 245 if (implicit_schurs) { 246 PetscScalar *fill_vals; 247 PetscInt *dummy_idx; 248 249 ierr = MatSetOption(sub_schurs->S_Ej_all,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 250 251 /* Work arrays */ 252 ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&fill_vals);CHKERRQ(ierr); 253 254 /* Loop on local problems to compute Schur complements explicitly (TODO; optimize)*/ 255 local_size = 0; 256 for (i=0;i<sub_schurs->n_subs_seq;i++) { 257 Vec work1,work2; 258 PetscInt j,local_problem_index = sub_schurs->index_sequential[i]; 259 260 ierr = MatCreateVecs(sub_schurs->S_Ej[local_problem_index],&work1,&work2);CHKERRQ(ierr); 261 ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr); 262 /* local Schur */ 263 for (j=0;j<subset_size;j++) { 264 ierr = VecSet(work1,0.0);CHKERRQ(ierr); 265 ierr = VecSetValue(work1,j,1.0,INSERT_VALUES);CHKERRQ(ierr); 266 ierr = VecPlaceArray(work2,&fill_vals[j*subset_size]);CHKERRQ(ierr); 267 ierr = MatMult(sub_schurs->S_Ej[local_problem_index],work1,work2);CHKERRQ(ierr); 268 ierr = VecResetArray(work2);CHKERRQ(ierr); 269 } 270 for (j=0;j<subset_size;j++) { 271 dummy_idx[j]=local_size+j; 272 } 273 ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr); 274 local_size += subset_size; 275 ierr = VecDestroy(&work1);CHKERRQ(ierr); 276 ierr = VecDestroy(&work2);CHKERRQ(ierr); 277 } 278 ierr = PetscFree2(dummy_idx,fill_vals);CHKERRQ(ierr); 279 } 280 ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 281 ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 282 283 /* Global matrix of all assembled Schur on subsets */ 284 ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr); 285 ierr = MatCreateIS(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr); 286 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr); 287 ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr); 288 ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr); 289 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 290 291 /* Get local part of (\sum_j S_Ej) */ 292 for (i=0;i<local_size;i++) { 293 all_local_idx_N[i] = all_local_idx_G[all_permutation_G[i]]; 294 } 295 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->l2gmap),local_size,all_local_idx_N,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr); 296 ierr = MatGetSubMatrices(global_schur_subsets,1,&temp_is,&temp_is,MAT_INITIAL_MATRIX,&submat_global_schur_subsets);CHKERRQ(ierr); 297 ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr); 298 ierr = ISDestroy(&temp_is);CHKERRQ(ierr); 299 for (i=0;i<local_size;i++) { 300 all_local_idx_G[all_permutation_G[i]] = i; 301 } 302 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr); 303 ierr = ISSetPermutation(temp_is);CHKERRQ(ierr); 304 ierr = MatPermute(submat_global_schur_subsets[0],temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 305 ierr = MatDestroyMatrices(1,&submat_global_schur_subsets);CHKERRQ(ierr); 306 ierr = ISDestroy(&temp_is);CHKERRQ(ierr); 307 ierr = PetscFree(all_permutation_G);CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 #undef __FUNCT__ 312 #define __FUNCT__ "PCBDDCSubSchursInit" 313 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscInt seqthreshold) 314 { 315 IS *faces,*edges,*all_cc; 316 PetscInt *index_sequential,*index_parallel; 317 PetscInt *auxlocal_sequential,*auxlocal_parallel; 318 PetscInt *auxglobal_sequential,*auxglobal_parallel; 319 PetscInt *auxmapping;//,*idxs; 320 PetscInt i,max_subset_size; 321 PetscInt n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems; 322 PetscInt n_faces,n_edges,n_all_cc; 323 PetscBool is_sorted; 324 PetscErrorCode ierr; 325 326 PetscFunctionBegin; 327 ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr); 328 if (!is_sorted) { 329 SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 330 } 331 ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr); 332 if (!is_sorted) { 333 SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 334 } 335 336 /* reset any previous data */ 337 ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr); 338 339 /* get index sets for faces and edges */ 340 ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr); 341 n_all_cc = n_faces+n_edges; 342 ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr); 343 for (i=0;i<n_faces;i++) { 344 all_cc[i] = faces[i]; 345 } 346 for (i=0;i<n_edges;i++) { 347 all_cc[n_faces+i] = edges[i]; 348 } 349 ierr = PetscFree(faces);CHKERRQ(ierr); 350 ierr = PetscFree(edges);CHKERRQ(ierr); 351 352 /* map interface's subsets */ 353 max_subset_size = 0; 354 for (i=0;i<n_all_cc;i++) { 355 PetscInt subset_size; 356 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 357 max_subset_size = PetscMax(max_subset_size,subset_size); 358 } 359 ierr = PetscMalloc1(max_subset_size,&auxmapping);CHKERRQ(ierr); 360 ierr = PetscMalloc2(graph->ncc,&auxlocal_sequential, 361 graph->ncc,&auxlocal_parallel);CHKERRQ(ierr); 362 ierr = PetscMalloc2(graph->ncc,&index_sequential, 363 graph->ncc,&index_parallel);CHKERRQ(ierr); 364 365 /* if threshold is negative, uses all sequential problems */ 366 if (seqthreshold < 0) seqthreshold = max_subset_size; 367 368 /* determine which problem has to be solved in parallel or sequentially */ 369 n_local_sequential_problems = 0; 370 n_local_parallel_problems = 0; 371 for (i=0;i<n_all_cc;i++) { 372 PetscInt subset_size,j,min_loc = 0; 373 const PetscInt *idxs; 374 375 ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr); 376 ierr = ISGetIndices(all_cc[i],&idxs);CHKERRQ(ierr); 377 ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr); 378 for (j=1;j<subset_size;j++) { 379 if (auxmapping[j]<auxmapping[min_loc]) { 380 min_loc = j; 381 } 382 } 383 if (subset_size > seqthreshold) { 384 index_parallel[n_local_parallel_problems] = i; 385 auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc]; 386 n_local_parallel_problems++; 387 } else { 388 index_sequential[n_local_sequential_problems] = i; 389 auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc]; 390 n_local_sequential_problems++; 391 } 392 ierr = ISRestoreIndices(all_cc[i],&idxs);CHKERRQ(ierr); 393 } 394 395 /* Number parallel problems */ 396 auxglobal_parallel = 0; 397 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr); 398 399 /* Number sequential problems */ 400 auxglobal_sequential = 0; 401 ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr); 402 403 /* update info in sub_schurs */ 404 if (A) { 405 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 406 sub_schurs->A = A; 407 } 408 ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr); 409 sub_schurs->S = S; 410 ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr); 411 sub_schurs->is_I = is_I; 412 ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr); 413 sub_schurs->is_B = is_B; 414 ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr); 415 sub_schurs->l2gmap = graph->l2gmap; 416 ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr); 417 sub_schurs->BtoNmap = BtoNmap; 418 sub_schurs->n_subs_seq = n_local_sequential_problems; 419 sub_schurs->n_subs_par = n_local_parallel_problems; 420 sub_schurs->n_subs_seq_g = n_sequential_problems; 421 sub_schurs->n_subs_par_g = n_parallel_problems; 422 sub_schurs->n_subs = sub_schurs->n_subs_seq + sub_schurs->n_subs_par; 423 sub_schurs->is_subs = all_cc; 424 sub_schurs->index_sequential = index_sequential; 425 sub_schurs->index_parallel = index_parallel; 426 sub_schurs->auxglobal_sequential = auxglobal_sequential; 427 sub_schurs->auxglobal_parallel = auxglobal_parallel; 428 429 /* free workspace */ 430 ierr = PetscFree(auxmapping);CHKERRQ(ierr); 431 ierr = PetscFree2(auxlocal_sequential,auxlocal_parallel);CHKERRQ(ierr); 432 433 PetscFunctionReturn(0); 434 } 435 436 #undef __FUNCT__ 437 #define __FUNCT__ "PCBDDCSubSchursCreate" 438 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) 439 { 440 PCBDDCSubSchurs schurs_ctx; 441 PetscErrorCode ierr; 442 443 PetscFunctionBegin; 444 ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr); 445 schurs_ctx->n_subs = 0; 446 *sub_schurs = schurs_ctx; 447 PetscFunctionReturn(0); 448 } 449 450 #undef __FUNCT__ 451 #define __FUNCT__ "PCBDDCSubSchursDestroy" 452 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) 453 { 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr); 458 ierr = PetscFree(*sub_schurs);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "PCBDDCSubSchursReset" 464 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) 465 { 466 PetscInt i; 467 PetscErrorCode ierr; 468 469 PetscFunctionBegin; 470 ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr); 471 ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr); 472 ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr); 473 ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr); 474 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr); 475 ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr); 476 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 477 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 478 ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr); 479 for (i=0;i<sub_schurs->n_subs;i++) { 480 ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr); 481 ierr = ISDestroy(&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 482 ierr = MatDestroy(&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 483 ierr = VecDestroy(&sub_schurs->work1[i]);CHKERRQ(ierr); 484 ierr = VecDestroy(&sub_schurs->work2[i]);CHKERRQ(ierr); 485 } 486 ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr); 487 if (sub_schurs->n_subs) { 488 ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr); 489 ierr = PetscFree4(sub_schurs->is_AEj_B,sub_schurs->S_Ej,sub_schurs->work1,sub_schurs->work2);CHKERRQ(ierr); 490 ierr = PetscFree2(sub_schurs->index_sequential,sub_schurs->index_parallel);CHKERRQ(ierr); 491 ierr = PetscFree(sub_schurs->auxglobal_sequential);CHKERRQ(ierr); 492 ierr = PetscFree(sub_schurs->auxglobal_parallel);CHKERRQ(ierr); 493 } 494 sub_schurs->n_subs = 0; 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private" 500 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added) 501 { 502 PetscInt i,j,n; 503 PetscErrorCode ierr; 504 505 PetscFunctionBegin; 506 n = 0; 507 for (i=-n_prev;i<0;i++) { 508 PetscInt start_dof = queue_tip[i]; 509 for (j=xadj[start_dof];j<xadj[start_dof+1];j++) { 510 PetscInt dof = adjncy[j]; 511 if (!PetscBTLookup(touched,dof)) { 512 ierr = PetscBTSet(touched,dof);CHKERRQ(ierr); 513 queue_tip[n] = dof; 514 n++; 515 } 516 } 517 } 518 *n_added = n; 519 PetscFunctionReturn(0); 520 } 521 522 #undef __FUNCT__ 523 #define __FUNCT__ "PCBDDCSubSchursSetUp" 524 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat S, IS is_A_I, IS is_A_B, PetscInt ncc, IS is_cc[], PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers) 525 { 526 Mat A_II,A_IB,A_BI,A_BB; 527 Mat AE_II,*AE_IE,*AE_EI,*AE_EE; 528 IS is_I,*is_subset_B; 529 ISLocalToGlobalMapping BtoNmap; 530 PetscInt i; 531 PetscBool is_sorted; 532 PetscErrorCode ierr; 533 534 PetscFunctionBegin; 535 ierr = ISSorted(is_A_I,&is_sorted);CHKERRQ(ierr); 536 if (!is_sorted) { 537 SETERRQ(PetscObjectComm((PetscObject)is_A_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted"); 538 } 539 ierr = ISSorted(is_A_B,&is_sorted);CHKERRQ(ierr); 540 if (!is_sorted) { 541 SETERRQ(PetscObjectComm((PetscObject)is_A_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted"); 542 } 543 544 /* get Schur complement matrices */ 545 ierr = MatSchurComplementGetSubMatrices(S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr); 546 547 /* allocate space for schur complements */ 548 sub_schurs->n_subs = ncc; 549 ierr = PetscMalloc4(sub_schurs->n_subs,&sub_schurs->is_AEj_B, 550 sub_schurs->n_subs,&sub_schurs->S_Ej, 551 sub_schurs->n_subs,&sub_schurs->work1, 552 sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr); 553 ierr = PetscMalloc4(ncc,&is_subset_B,ncc,&AE_IE,ncc,&AE_EI,ncc,&AE_EE);CHKERRQ(ierr); 554 555 /* maps */ 556 if (sub_schurs->n_subs && nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */ 557 ISLocalToGlobalMapping ItoNmap; 558 PetscBT touched; 559 const PetscInt* idx_B; 560 PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering; 561 562 /* get sizes */ 563 ierr = ISGetLocalSize(is_A_I,&n_I);CHKERRQ(ierr); 564 ierr = ISGetLocalSize(is_A_B,&n_B);CHKERRQ(ierr); 565 566 ierr = ISLocalToGlobalMappingCreateIS(is_A_I,&ItoNmap);CHKERRQ(ierr); 567 ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr); 568 ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr); 569 ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr); 570 571 /* all boundary dofs must be skipped when adding layers */ 572 ierr = ISGetIndices(is_A_B,&idx_B);CHKERRQ(ierr); 573 for (j=0;j<n_B;j++) { 574 ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr); 575 } 576 ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr); 577 ierr = ISRestoreIndices(is_A_B,&idx_B);CHKERRQ(ierr); 578 579 /* add next layers of dofs */ 580 n_local_dofs = n_B; 581 n_prev_added = n_B; 582 for (layer=0;layer<nlayers;layer++) { 583 PetscInt n_added; 584 if (n_local_dofs == n_I+n_B) break; 585 if (n_local_dofs > n_I+n_B) { 586 SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B); 587 } 588 ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr); 589 n_prev_added = n_added; 590 n_local_dofs += n_added; 591 if (!n_added) break; 592 } 593 ierr = PetscBTDestroy(&touched);CHKERRQ(ierr); 594 595 /* IS for I dofs in original numbering and in I numbering */ 596 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)ItoNmap),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr); 597 ierr = PetscFree(local_numbering);CHKERRQ(ierr); 598 ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr); 599 ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr); 600 ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr); 601 602 /* II block */ 603 ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr); 604 } else { 605 PetscInt n_I; 606 607 /* IS for I dofs in original numbering */ 608 ierr = PetscObjectReference((PetscObject)is_A_I);CHKERRQ(ierr); 609 sub_schurs->is_I_layer = is_A_I; 610 611 /* IS for I dofs in I numbering (strided 1) */ 612 ierr = ISGetSize(is_A_I,&n_I);CHKERRQ(ierr); 613 ierr = ISCreateStride(PetscObjectComm((PetscObject)is_A_I),n_I,0,1,&is_I);CHKERRQ(ierr); 614 615 /* II block is the same */ 616 ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr); 617 AE_II = A_II; 618 } 619 620 /* subsets in original and boundary numbering */ 621 ierr = ISLocalToGlobalMappingCreateIS(is_A_B,&BtoNmap);CHKERRQ(ierr); 622 for (i=0;i<sub_schurs->n_subs;i++) { 623 ierr = ISDuplicate(is_cc[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 624 ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr); 625 ierr = ISGlobalToLocalMappingApplyIS(BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr); 626 } 627 ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr); 628 629 /* EE block */ 630 for (i=0;i<sub_schurs->n_subs;i++) { 631 ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr); 632 } 633 /* IE block */ 634 for (i=0;i<sub_schurs->n_subs;i++) { 635 ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr); 636 } 637 /* EI block */ 638 for (i=0;i<sub_schurs->n_subs;i++) { 639 ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr); 640 } 641 642 /* setup Schur complements on subset */ 643 for (i=0;i<sub_schurs->n_subs;i++) { 644 ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr); 645 ierr = MatCreateVecs(sub_schurs->S_Ej[i],&sub_schurs->work1[i],&sub_schurs->work2[i]);CHKERRQ(ierr); 646 if (AE_II == A_II) { /* we can reuse the same ksp */ 647 KSP ksp; 648 ierr = MatSchurComplementGetKSP(S,&ksp);CHKERRQ(ierr); 649 ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr); 650 } else { /* build new ksp object which inherits ksp and pc types from the original one */ 651 KSP origksp,schurksp; 652 PC origpc,schurpc; 653 KSPType ksp_type; 654 PCType pc_type; 655 PetscInt n_internal; 656 657 ierr = MatSchurComplementGetKSP(S,&origksp);CHKERRQ(ierr); 658 ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr); 659 ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr); 660 ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr); 661 ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr); 662 ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr); 663 ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr); 664 ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr); 665 ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr); 666 if (n_internal) { /* UMFPACK gives error with 0 sized problems */ 667 MatSolverPackage solver=NULL; 668 ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr); 669 if (solver) { 670 ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr); 671 } 672 } 673 ierr = KSPSetUp(schurksp);CHKERRQ(ierr); 674 } 675 } 676 /* free */ 677 ierr = ISDestroy(&is_I);CHKERRQ(ierr); 678 ierr = MatDestroy(&AE_II);CHKERRQ(ierr); 679 for (i=0;i<sub_schurs->n_subs;i++) { 680 ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr); 681 ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr); 682 ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr); 683 ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr); 684 } 685 ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688