xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 9087bf029dbfb05f167ec36f9fc026487ff8e033)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCComputeExplicitSchur"
10 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
11 {
12   Mat            B, C, D, Bd, Cd, AinvBd;
13   KSP            ksp;
14   PC             pc;
15   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
16   PetscReal      fill = 2.0;
17   PetscInt       n_I;
18   PetscMPIInt    size;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);CHKERRQ(ierr);
23   if (size != 1) {
24     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
25   }
26   if (reuse == MAT_REUSE_MATRIX) {
27     PetscBool Sdense;
28 
29     ierr = PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);CHKERRQ(ierr);
30     if (!Sdense) {
31       SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
32     }
33   }
34   ierr = MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);CHKERRQ(ierr);
35   ierr = MatSchurComplementGetKSP(M, &ksp);CHKERRQ(ierr);
36   ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
37   ierr = PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);CHKERRQ(ierr);
38   ierr = PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);CHKERRQ(ierr);
39   ierr = PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);CHKERRQ(ierr);
40   ierr = PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);CHKERRQ(ierr);
41   ierr = PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);CHKERRQ(ierr);
42   ierr = MatGetSize(B,&n_I,NULL);CHKERRQ(ierr);
43   if (n_I) {
44     if (!Bdense) {
45       ierr = MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);CHKERRQ(ierr);
46     } else {
47       Bd = B;
48     }
49 
50     if (isLU || isILU || isCHOL) {
51       Mat fact;
52       ierr = KSPSetUp(ksp);CHKERRQ(ierr);
53       ierr = PCFactorGetMatrix(pc, &fact);CHKERRQ(ierr);
54       ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
55       ierr = MatMatSolve(fact, Bd, AinvBd);CHKERRQ(ierr);
56     } else {
57       PetscBool ex = PETSC_TRUE;
58 
59       if (ex) {
60         Mat Ainvd;
61 
62         ierr = PCComputeExplicitOperator(pc, &Ainvd);CHKERRQ(ierr);
63         ierr = MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);CHKERRQ(ierr);
64         ierr = MatDestroy(&Ainvd);CHKERRQ(ierr);
65       } else {
66         Vec         sol,rhs;
67         PetscScalar *arrayrhs,*arraysol;
68         PetscInt    i,nrhs,n;
69 
70         ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
71         ierr = MatGetSize(Bd,&n,&nrhs);CHKERRQ(ierr);
72         ierr = MatDenseGetArray(Bd,&arrayrhs);CHKERRQ(ierr);
73         ierr = MatDenseGetArray(AinvBd,&arraysol);CHKERRQ(ierr);
74         ierr = KSPGetSolution(ksp,&sol);CHKERRQ(ierr);
75         ierr = KSPGetRhs(ksp,&rhs);CHKERRQ(ierr);
76         for (i=0;i<nrhs;i++) {
77           ierr = VecPlaceArray(rhs,arrayrhs+i*n);CHKERRQ(ierr);
78           ierr = VecPlaceArray(sol,arraysol+i*n);CHKERRQ(ierr);
79           ierr = KSPSolve(ksp,rhs,sol);CHKERRQ(ierr);
80           ierr = VecResetArray(rhs);CHKERRQ(ierr);
81           ierr = VecResetArray(sol);CHKERRQ(ierr);
82         }
83         ierr = MatDenseRestoreArray(Bd,&arrayrhs);CHKERRQ(ierr);
84         ierr = MatDenseRestoreArray(AinvBd,&arrayrhs);CHKERRQ(ierr);
85       }
86     }
87     if (!Bdense & !issym) {
88       ierr = MatDestroy(&Bd);CHKERRQ(ierr);
89     }
90 
91     if (!issym) {
92       if (!Cdense) {
93         ierr = MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);CHKERRQ(ierr);
94       } else {
95         Cd = C;
96       }
97       ierr = MatMatMult(Cd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
98       if (!Cdense) {
99         ierr = MatDestroy(&Cd);CHKERRQ(ierr);
100       }
101     } else {
102       ierr = MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
103       if (!Bdense) {
104         ierr = MatDestroy(&Bd);CHKERRQ(ierr);
105       }
106     }
107     ierr = MatDestroy(&AinvBd);CHKERRQ(ierr);
108   }
109 
110   if (D) {
111     Mat       Dd;
112     PetscBool Ddense;
113 
114     ierr = PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);CHKERRQ(ierr);
115     if (!Ddense) {
116       ierr = MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);CHKERRQ(ierr);
117     } else {
118       Dd = D;
119     }
120     if (n_I) {
121       ierr = MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
122     } else {
123       if (reuse == MAT_INITIAL_MATRIX) {
124         ierr = MatDuplicate(Dd,MAT_COPY_VALUES,S);CHKERRQ(ierr);
125       } else {
126         ierr = MatCopy(Dd,*S,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
127       }
128     }
129     if (!Ddense) {
130       ierr = MatDestroy(&Dd);CHKERRQ(ierr);
131     }
132   } else {
133     ierr = MatScale(*S,-1.0);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "PCBDDCSubSchursSetUp"
140 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool compute_Stilda, PetscBool invert_Stildas, PetscBool use_edges, PetscBool use_faces)
141 {
142   Mat                    A_II,A_IB,A_BI,A_BB,AE_II;
143   Mat                    S_all,S_all_inv;
144   Mat                    global_schur_subsets,work_mat;
145   Mat                    S_Ej_tilda_all,S_Ej_inv_all;
146   ISLocalToGlobalMapping l2gmap_subsets;
147   IS                     is_I,temp_is;
148   PetscInt               *nnz,*all_local_idx_G,*all_local_idx_N;
149   PetscInt               i,subset_size,max_subset_size;
150   PetscInt               extra,local_size,global_size;
151   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
152   PetscScalar            *Bwork;
153   PetscSubcomm           subcomm;
154   PetscMPIInt            color,rank;
155   MPI_Comm               comm_n;
156   PetscErrorCode         ierr;
157 
158   PetscFunctionBegin;
159   /* preliminary checks */
160   if (!sub_schurs->use_mumps && compute_Stilda) {
161     SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
162   }
163   /* determine if we are dealing with hermitian positive definite problems */
164   sub_schurs->is_hermitian = PETSC_FALSE;
165   sub_schurs->is_posdef = PETSC_FALSE;
166   if (sub_schurs->A) {
167     PetscInt lsize;
168 
169     ierr = MatGetSize(sub_schurs->A,&lsize,NULL);CHKERRQ(ierr);
170     if (lsize) {
171       ierr = MatIsHermitian(sub_schurs->A,0.0,&sub_schurs->is_hermitian);CHKERRQ(ierr);
172       if (sub_schurs->is_hermitian) {
173         PetscScalar val;
174         Vec         vec1,vec2;
175 
176         ierr = MatCreateVecs(sub_schurs->A,&vec1,&vec2);CHKERRQ(ierr);
177         ierr = VecSetRandom(vec1,NULL);
178         ierr = VecCopy(vec1,vec2);CHKERRQ(ierr);
179         ierr = MatMult(sub_schurs->A,vec2,vec1);CHKERRQ(ierr);
180         ierr = VecDot(vec1,vec2,&val);CHKERRQ(ierr);
181         if (PetscRealPart(val) > 0. && PetscImaginaryPart(val) == 0.) sub_schurs->is_posdef = PETSC_TRUE;
182         ierr = VecDestroy(&vec1);CHKERRQ(ierr);
183         ierr = VecDestroy(&vec2);CHKERRQ(ierr);
184       }
185     } else {
186       sub_schurs->is_hermitian = PETSC_TRUE;
187       sub_schurs->is_posdef = PETSC_TRUE;
188     }
189     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
190       SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"General matrix pencils are not currently supported");
191     }
192   }
193   /* restrict work on active processes */
194   color = 0;
195   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
196   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);CHKERRQ(ierr);
197   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);CHKERRQ(ierr);
198   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
199   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
200   ierr = PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);CHKERRQ(ierr);
201   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
202   if (!sub_schurs->n_subs) {
203     ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
204     PetscFunctionReturn(0);
205   }
206 
207   /* get Schur complement matrices */
208   if (!sub_schurs->use_mumps) {
209     PetscBool isseqaij;
210     ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr);
211     ierr = PetscObjectTypeCompare((PetscObject)A_BB,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
212     if (!isseqaij) {
213       ierr = MatConvert(A_BB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BB);CHKERRQ(ierr);
214       ierr = MatConvert(A_IB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_IB);CHKERRQ(ierr);
215       ierr = MatConvert(A_BI,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BI);CHKERRQ(ierr);
216     }
217   } else {
218     A_II = NULL;
219     A_IB = NULL;
220     A_BI = NULL;
221     A_BB = NULL;
222   }
223   S_all = NULL;
224   S_all_inv = NULL;
225   S_Ej_tilda_all = NULL;
226   S_Ej_inv_all = NULL;
227 
228   /* determine interior problems */
229   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
230   ierr = ISGetLocalSize(sub_schurs->is_I,&i);CHKERRQ(ierr);
231   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
232     PetscBT                touched;
233     const PetscInt*        idx_B;
234     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
235 
236     if (xadj == NULL || adjncy == NULL) {
237       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
238     }
239     /* get sizes */
240     ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
241     ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
242 
243     ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr);
244     ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr);
245     ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr);
246 
247     /* all boundary dofs must be skipped when adding layers */
248     ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
249     for (j=0;j<n_B;j++) {
250       ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr);
251     }
252     ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr);
253     ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
254 
255     /* add prescribed number of layers of dofs */
256     n_local_dofs = n_B;
257     n_prev_added = n_B;
258     for (layer=0;layer<nlayers;layer++) {
259       PetscInt n_added;
260       if (n_local_dofs == n_I+n_B) break;
261       if (n_local_dofs > n_I+n_B) {
262         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
263       }
264       ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr);
265       n_prev_added = n_added;
266       n_local_dofs += n_added;
267       if (!n_added) break;
268     }
269     ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
270 
271     /* IS for I layer dofs in original numbering */
272     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr);
273     ierr = PetscFree(local_numbering);CHKERRQ(ierr);
274     ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr);
275     /* IS for I layer dofs in I numbering */
276     if (!sub_schurs->use_mumps) {
277       ISLocalToGlobalMapping ItoNmap;
278       ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr);
279       ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr);
280       ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr);
281 
282       /* II block */
283       ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr);
284     }
285   } else {
286     PetscInt n_I;
287 
288     /* IS for I dofs in original numbering */
289     ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr);
290     sub_schurs->is_I_layer = sub_schurs->is_I;
291 
292     /* IS for I dofs in I numbering (strided 1) */
293     if (!sub_schurs->use_mumps) {
294       ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
295       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr);
296 
297       /* II block is the same */
298       ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr);
299       AE_II = A_II;
300     }
301   }
302 
303   /* Get info on subset sizes and sum of all subsets sizes */
304   max_subset_size = 0;
305   local_size = 0;
306   for (i=0;i<sub_schurs->n_subs;i++) {
307     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
308     max_subset_size = PetscMax(subset_size,max_subset_size);
309     local_size += subset_size;
310   }
311 
312   /* Work arrays for local indices */
313   extra = 0;
314   if (sub_schurs->use_mumps) {
315     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&extra);CHKERRQ(ierr);
316   }
317   ierr = PetscMalloc1(local_size+extra,&all_local_idx_N);CHKERRQ(ierr);
318   if (extra) {
319     const PetscInt *idxs;
320     ierr = ISGetIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
321     ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr);
322     ierr = ISRestoreIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
323   }
324   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
325 
326   /* Get local indices in local numbering */
327   local_size = 0;
328   for (i=0;i<sub_schurs->n_subs;i++) {
329     PetscInt j;
330     const    PetscInt *idxs;
331 
332     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
333     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
334     /* subset indices in local numbering */
335     ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
336     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
337     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
338     local_size += subset_size;
339   }
340 
341   /* allocate extra workspace needed only for GETRI */
342   Bwork = NULL;
343   pivots = NULL;
344   if (sub_schurs->n_subs && !sub_schurs->is_hermitian) {
345     PetscScalar lwork;
346 
347     B_lwork = -1;
348     ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
349     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
350     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
351     ierr = PetscFPTrapPop();CHKERRQ(ierr);
352     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
353     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
354     ierr = PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);CHKERRQ(ierr);
355   }
356 
357   /* prepare parallel matrices for summing up properly schurs on subsets */
358   ierr = PCBDDCSubsetNumbering(comm_n,sub_schurs->l2gmap,local_size,all_local_idx_N+extra,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
359   ierr = ISLocalToGlobalMappingCreate(comm_n,1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr);
360   ierr = MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr);
361   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
362   ierr = MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);CHKERRQ(ierr);
363   ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr);
364   ierr = MatSetType(global_schur_subsets,MATMPIAIJ);CHKERRQ(ierr);
365 
366   /* subset indices in local boundary numbering */
367   if (!sub_schurs->is_Ej_all) {
368     PetscInt *all_local_idx_B;
369 
370     ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
371     ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr);
372     if (subset_size != local_size) {
373       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
374     }
375     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr);
376   }
377 
378   /* Local matrix of all local Schur on subsets (transposed) */
379   if (!sub_schurs->S_Ej_all) {
380     ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
381     ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
382     ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
383     ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
384   } else {
385     ierr = MatZeroEntries(sub_schurs->S_Ej_all);CHKERRQ(ierr);
386   }
387 
388   /* Compute Schur complements explicitly */
389   ierr = PetscBTMemzero(sub_schurs->n_subs,sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
390   if (!sub_schurs->use_mumps) {
391     Mat         S_Ej_expl;
392     PetscScalar *work;
393     PetscInt    j,*dummy_idx;
394     PetscBool   Sdense;
395 
396     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
397     local_size = 0;
398     for (i=0;i<sub_schurs->n_subs;i++) {
399       IS  is_subset_B;
400       Mat AE_EE,AE_IE,AE_EI,S_Ej;
401 
402       /* subsets in original and boundary numbering */
403       ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);CHKERRQ(ierr);
404       /* EE block */
405       ierr = MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);CHKERRQ(ierr);
406       /* IE block */
407       ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);CHKERRQ(ierr);
408       /* EI block */
409       if (sub_schurs->is_hermitian) {
410         ierr = MatCreateTranspose(AE_IE,&AE_EI);CHKERRQ(ierr);
411       } else {
412         ierr = MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);CHKERRQ(ierr);
413       }
414       ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);CHKERRQ(ierr);
415       ierr = MatDestroy(&AE_EE);CHKERRQ(ierr);
416       ierr = MatDestroy(&AE_IE);CHKERRQ(ierr);
417       ierr = MatDestroy(&AE_EI);CHKERRQ(ierr);
418       if (AE_II == A_II) { /* we can reuse the same ksp */
419         KSP ksp;
420         ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr);
421         ierr = MatSchurComplementSetKSP(S_Ej,ksp);CHKERRQ(ierr);
422       } else { /* build new ksp object which inherits ksp and pc types from the original one */
423         KSP       origksp,schurksp;
424         PC        origpc,schurpc;
425         KSPType   ksp_type;
426         PetscInt  n_internal;
427         PetscBool ispcnone;
428 
429         ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr);
430         ierr = MatSchurComplementGetKSP(S_Ej,&schurksp);CHKERRQ(ierr);
431         ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr);
432         ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr);
433         ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr);
434         ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr);
435         ierr = PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);CHKERRQ(ierr);
436         if (!ispcnone) {
437           PCType pc_type;
438           ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr);
439           ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr);
440         } else {
441           ierr = PCSetType(schurpc,PCLU);CHKERRQ(ierr);
442         }
443         ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr);
444         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
445           MatSolverPackage solver=NULL;
446           ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
447           if (solver) {
448             ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr);
449           }
450         }
451         ierr = KSPSetUp(schurksp);CHKERRQ(ierr);
452       }
453       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
454       ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);CHKERRQ(ierr);
455       ierr = PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);CHKERRQ(ierr);
456       ierr = PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);CHKERRQ(ierr);
457       if (Sdense) {
458         for (j=0;j<subset_size;j++) {
459           dummy_idx[j]=local_size+j;
460         }
461         ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
462       } else {
463         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
464       }
465       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
466       local_size += subset_size;
467     }
468     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
469     /* free */
470     ierr = ISDestroy(&is_I);CHKERRQ(ierr);
471     ierr = MatDestroy(&AE_II);CHKERRQ(ierr);
472     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
473   } else {
474     Mat         A,F;
475     IS          is_A_all;
476     PetscScalar *work;
477     PetscInt    *idxs_schur,n_I,*dummy_idx;
478 
479     /* get working mat */
480     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&n_I);CHKERRQ(ierr);
481     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);CHKERRQ(ierr);
482     ierr = MatGetSubMatrixUnsorted(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr);
483     ierr = ISDestroy(&is_A_all);CHKERRQ(ierr);
484 
485     if (n_I) {
486       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
487         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr);
488       } else {
489         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr);
490       }
491 
492       /* subsets ordered last */
493       ierr = PetscMalloc1(local_size,&idxs_schur);CHKERRQ(ierr);
494       for (i=0;i<local_size;i++) {
495         idxs_schur[i] = n_I+i+1;
496       }
497 #if defined(PETSC_HAVE_MUMPS)
498       ierr = MatMumpsSetSchurIndices(F,local_size,idxs_schur);CHKERRQ(ierr);
499 #endif
500       ierr = PetscFree(idxs_schur);CHKERRQ(ierr);
501 
502       /* factorization step */
503       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
504         ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr);
505         ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr);
506       } else {
507         ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr);
508         ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr);
509       }
510 
511       /* get explicit Schur Complement computed during numeric factorization */
512 #if defined(PETSC_HAVE_MUMPS)
513       ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr);
514 #endif
515       ierr = MatDestroy(&F);CHKERRQ(ierr);
516     } else {
517       ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);CHKERRQ(ierr);
518     }
519     ierr = MatDestroy(&A);CHKERRQ(ierr);
520     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
521 
522     if (compute_Stilda) { /* TODO PICKUP BETTER NAMES */
523       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_tilda_all);CHKERRQ(ierr);
524       ierr = MatSetSizes(S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
525       ierr = MatSetType(S_Ej_tilda_all,MATAIJ);CHKERRQ(ierr);
526       ierr = MatSeqAIJSetPreallocation(S_Ej_tilda_all,0,nnz);CHKERRQ(ierr);
527       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_inv_all);CHKERRQ(ierr);
528       ierr = MatSetSizes(S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
529       ierr = MatSetType(S_Ej_inv_all,MATAIJ);CHKERRQ(ierr);
530       ierr = MatSeqAIJSetPreallocation(S_Ej_inv_all,0,nnz);CHKERRQ(ierr);
531     }
532 
533     /* Get St^-1 */
534     if (compute_Stilda) {
535       PetscScalar *vals;
536       ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
537       ierr = MatDuplicate(S_all,MAT_COPY_VALUES,&S_all_inv);CHKERRQ(ierr);
538       ierr = MatDenseGetArray(S_all_inv,&vals);CHKERRQ(ierr);
539       if (!sub_schurs->is_hermitian) {
540         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,vals,&B_N,pivots,&B_ierr));
541         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
542         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,vals,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
543         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
544       } else {
545         PetscInt j,k;
546 
547         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,vals,&B_N,&B_ierr));
548         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
549         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,vals,&B_N,&B_ierr));
550         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
551         for (j=0;j<B_N;j++) {
552           for (k=j+1;k<B_N;k++) {
553             vals[k*B_N+j] = vals[j*B_N+k];
554           }
555         }
556       }
557       ierr = MatDenseRestoreArray(S_all_inv,&vals);CHKERRQ(ierr);
558     }
559 
560     /* Work arrays */
561     if (sub_schurs->n_subs == 1) {
562       ierr = PetscMalloc1(max_subset_size,&dummy_idx);CHKERRQ(ierr);
563     } else {
564       ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
565     }
566 
567     local_size = 0;
568     for (i=0;i<sub_schurs->n_subs;i++) {
569       Mat S_Ej;
570       IS  is_E;
571       PetscInt j;
572 
573       /* get S_E */
574       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
575       if (sub_schurs->n_subs == 1) {
576         ierr = MatDenseGetArray(S_all,&work);CHKERRQ(ierr);
577         S_Ej = NULL;
578         is_E = NULL;
579       } else {
580         ierr = ISCreateStride(PETSC_COMM_SELF,subset_size,local_size,1,&is_E);CHKERRQ(ierr);
581         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej);CHKERRQ(ierr);
582         ierr = MatGetSubMatrix(S_all,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
583       }
584       /* insert S_E values */
585       for (j=0;j<subset_size;j++) {
586         dummy_idx[j]=local_size+j;
587       }
588       ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
589 
590       /* if adaptivity is requested, invert S_E and insert St_E^-1 blocks */
591       if (compute_Stilda && ((PetscBTLookup(sub_schurs->is_edge,i) && use_edges) || (!PetscBTLookup(sub_schurs->is_edge,i) && use_faces))) {
592         /* get S_E^-1 */
593         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
594         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
595         if (!sub_schurs->is_hermitian) {
596           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
597           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
598           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
599           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
600         } else {
601           PetscInt j,k;
602 
603           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
604           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
605           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
606           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
607           for (j=0;j<B_N;j++) {
608             for (k=j+1;k<B_N;k++) {
609               work[k*B_N+j] = work[j*B_N+k];
610             }
611           }
612         }
613         ierr = PetscFPTrapPop();CHKERRQ(ierr);
614         ierr = MatSetValues(S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
615 
616         /* get St_E^-1 */
617         if (sub_schurs->n_subs == 1) {
618           ierr = MatDenseRestoreArray(S_all,&work);CHKERRQ(ierr);
619           ierr = MatDenseGetArray(S_all_inv,&work);CHKERRQ(ierr);
620         } else {
621           ierr = MatGetSubMatrix(S_all_inv,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
622         }
623         ierr = MatSetValues(S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
624         if (sub_schurs->n_subs == 1) {
625           ierr = MatDenseRestoreArray(S_all_inv,&work);CHKERRQ(ierr);
626         }
627         ierr = PetscBTSet(sub_schurs->computed_Stilda_subs,i);CHKERRQ(ierr);
628       } else if (sub_schurs->n_subs == 1) {
629         ierr = MatDenseRestoreArray(S_all,&work);CHKERRQ(ierr);
630       }
631       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
632       ierr = ISDestroy(&is_E);CHKERRQ(ierr);
633       local_size += subset_size;
634     }
635     if (sub_schurs->n_subs == 1) {
636       ierr = PetscFree(dummy_idx);CHKERRQ(ierr);
637     } else {
638       ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
639     }
640   }
641   ierr = PetscFree(nnz);CHKERRQ(ierr);
642   ierr = MatDestroy(&S_all);CHKERRQ(ierr);
643   ierr = MatDestroy(&S_all_inv);CHKERRQ(ierr);
644   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
645   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646   if (compute_Stilda) {
647     ierr = MatAssemblyBegin(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
648     ierr = MatAssemblyEnd(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
649     ierr = MatAssemblyBegin(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
650     ierr = MatAssemblyEnd(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
651   }
652 
653   /* Global matrix of all assembled Schur on subsets */
654   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
655   ierr = MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);CHKERRQ(ierr);
656   ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
657 
658   /* Get local part of (\sum_j S_Ej) */
659   ierr = ISCreateGeneral(comm_n,local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr);
660   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
661   ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
662 
663   /* Compute explicitly (\sum_j S_Ej)^-1 */
664   {
665     Mat         tmpmat;
666     PetscScalar *array;
667     PetscInt    cum;
668 
669     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
670     cum = 0;
671     for (i=0;i<sub_schurs->n_subs;i++) {
672       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
673       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
674       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
675       if (!sub_schurs->is_hermitian) {
676         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
677         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
678         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
679         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
680       } else {
681         PetscInt j,k;
682 
683         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
684         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
685         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
686         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
687         for (j=0;j<B_N;j++) {
688           for (k=j+1;k<B_N;k++) {
689             array[k*B_N+j+cum] = array[j*B_N+k+cum];
690           }
691         }
692       }
693       ierr = PetscFPTrapPop();CHKERRQ(ierr);
694       cum += subset_size*subset_size;
695     }
696     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
697     ierr = MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);CHKERRQ(ierr);
698     ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
699     sub_schurs->S_Ej_all = tmpmat;
700   }
701 
702   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) and invert them */
703   if (compute_Stilda) {
704     PetscInt    cum;
705     PetscScalar *array,*array2;
706 
707     ierr = MatISSetLocalMat(work_mat,S_Ej_tilda_all);CHKERRQ(ierr);
708     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
709     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
710     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
711     ierr = MatISSetLocalMat(work_mat,S_Ej_inv_all);CHKERRQ(ierr);
712     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
713     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
714     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
715     /* invert blocks */
716     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
717     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
718     cum = 0;
719     for (i=0;i<sub_schurs->n_subs;i++) {
720       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
721       if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
722         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
723         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
724         if (!sub_schurs->is_hermitian) {
725           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
726           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
727           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
728           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
729         } else {
730           PetscInt j,k;
731 
732           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
733           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
734           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
735           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
736           for (j=0;j<B_N;j++) {
737             for (k=j+1;k<B_N;k++) {
738               array[k*B_N+j+cum] = array[j*B_N+k+cum];
739             }
740           }
741         }
742         if (invert_Stildas) { /* Stildas can be singular */
743           if (!sub_schurs->is_hermitian) {
744             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array2+cum,&B_N,pivots,&B_ierr));
745             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
746             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array2+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
747             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
748           } else {
749             PetscInt j,k;
750 
751             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array2+cum,&B_N,&B_ierr));
752             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
753             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array2+cum,&B_N,&B_ierr));
754             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
755             for (j=0;j<B_N;j++) {
756               for (k=j+1;k<B_N;k++) {
757                 array2[k*B_N+j+cum] = array2[j*B_N+k+cum];
758               }
759             }
760           }
761         }
762         ierr = PetscFPTrapPop();CHKERRQ(ierr);
763       }
764       cum += subset_size*subset_size;
765     }
766     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
767     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
768   }
769 
770   /* free workspace */
771   ierr = PetscFree2(Bwork,pivots);CHKERRQ(ierr);
772   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
773   ierr = MatDestroy(&S_Ej_tilda_all);CHKERRQ(ierr);
774   ierr = MatDestroy(&S_Ej_inv_all);CHKERRQ(ierr);
775   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
776   ierr = ISDestroy(&temp_is);CHKERRQ(ierr);
777   ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "PCBDDCSubSchursInit"
783 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
784 {
785   IS              *faces,*edges,*all_cc,vertices;
786   PetscInt        i,n_faces,n_edges,n_all_cc;
787   PetscBool       is_sorted;
788   PetscErrorCode  ierr;
789 
790   PetscFunctionBegin;
791   ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr);
792   if (!is_sorted) {
793     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
794   }
795   ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr);
796   if (!is_sorted) {
797     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
798   }
799 
800   /* reset any previous data */
801   ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr);
802 
803   /* get index sets for faces and edges (already sorted by global ordering) */
804   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);CHKERRQ(ierr);
805   n_all_cc = n_faces+n_edges;
806   ierr = PetscBTCreate(n_all_cc,&sub_schurs->is_edge);CHKERRQ(ierr);
807   ierr = PetscBTCreate(n_all_cc,&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
808   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
809   for (i=0;i<n_faces;i++) {
810     all_cc[i] = faces[i];
811   }
812   for (i=0;i<n_edges;i++) {
813     all_cc[n_faces+i] = edges[i];
814     ierr = PetscBTSet(sub_schurs->is_edge,n_faces+i);CHKERRQ(ierr);
815   }
816   ierr = PetscFree(faces);CHKERRQ(ierr);
817   ierr = PetscFree(edges);CHKERRQ(ierr);
818 
819   /* Determine if MUMPS cane be used */
820   sub_schurs->use_mumps = PETSC_FALSE;
821 #if defined(PETSC_HAVE_MUMPS)
822   sub_schurs->use_mumps = (PetscBool)(!!A);
823 #endif
824 
825   /* update info in sub_schurs */
826   if (A) {
827     PetscBool isseqaij;
828 
829     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
830     if (isseqaij) {
831       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
832       sub_schurs->A = A;
833     } else { /* SeqBAIJ matrices does not support symmetry checking, SeqSBAIJ does not support MatPermute */
834       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);CHKERRQ(ierr);
835     }
836   }
837   ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr);
838   sub_schurs->S = S;
839   ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr);
840   sub_schurs->is_I = is_I;
841   ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr);
842   sub_schurs->is_B = is_B;
843   ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr);
844   sub_schurs->l2gmap = graph->l2gmap;
845   ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr);
846   sub_schurs->BtoNmap = BtoNmap;
847   sub_schurs->n_subs = n_all_cc;
848   sub_schurs->is_subs = all_cc;
849   if (!sub_schurs->use_mumps) { /* for adaptive selection */
850     for (i=0;i<sub_schurs->n_subs;i++) {
851       ierr = ISSort(sub_schurs->is_subs[i]);CHKERRQ(ierr);
852     }
853   }
854   sub_schurs->is_Ej_com = vertices;
855 
856 
857   /* allocate space for schur complements */
858   sub_schurs->S_Ej_all = NULL;
859   sub_schurs->sum_S_Ej_all = NULL;
860   sub_schurs->sum_S_Ej_inv_all = NULL;
861   sub_schurs->sum_S_Ej_tilda_all = NULL;
862   sub_schurs->is_Ej_all = NULL;
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "PCBDDCSubSchursCreate"
868 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
869 {
870   PCBDDCSubSchurs schurs_ctx;
871   PetscErrorCode  ierr;
872 
873   PetscFunctionBegin;
874   ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr);
875   schurs_ctx->n_subs = 0;
876   *sub_schurs = schurs_ctx;
877   PetscFunctionReturn(0);
878 }
879 
880 #undef __FUNCT__
881 #define __FUNCT__ "PCBDDCSubSchursDestroy"
882 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
883 {
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr);
888   ierr = PetscFree(*sub_schurs);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "PCBDDCSubSchursReset"
894 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
895 {
896   PetscInt       i;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
901   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
902   ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr);
903   ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr);
904   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr);
905   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr);
906   ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
907   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
908   ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
909   ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
910   ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr);
911   ierr = ISDestroy(&sub_schurs->is_Ej_com);CHKERRQ(ierr);
912   ierr = PetscBTDestroy(&sub_schurs->is_edge);CHKERRQ(ierr);
913   ierr = PetscBTDestroy(&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
914   for (i=0;i<sub_schurs->n_subs;i++) {
915     ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr);
916   }
917   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
918   if (sub_schurs->n_subs) {
919     ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr);
920   }
921   sub_schurs->n_subs = 0;
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private"
927 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
928 {
929   PetscInt       i,j,n;
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   n = 0;
934   for (i=-n_prev;i<0;i++) {
935     PetscInt start_dof = queue_tip[i];
936     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
937       PetscInt dof = adjncy[j];
938       if (!PetscBTLookup(touched,dof)) {
939         ierr = PetscBTSet(touched,dof);CHKERRQ(ierr);
940         queue_tip[n] = dof;
941         n++;
942       }
943     }
944   }
945   *n_added = n;
946   PetscFunctionReturn(0);
947 }
948