xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 4bc2dc4bd263f882f69e906548202714ae1fb6b6)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCComputeExplicitSchur"
10 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
11 {
12   Mat            B, C, D, Bd, Cd, AinvBd;
13   KSP            ksp;
14   PC             pc;
15   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
16   PetscReal      fill = 2.0;
17   PetscInt       n_I;
18   PetscMPIInt    size;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);CHKERRQ(ierr);
23   if (size != 1) {
24     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
25   }
26   if (reuse == MAT_REUSE_MATRIX) {
27     PetscBool Sdense;
28 
29     ierr = PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);CHKERRQ(ierr);
30     if (!Sdense) {
31       SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
32     }
33   }
34   ierr = MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);CHKERRQ(ierr);
35   ierr = MatSchurComplementGetKSP(M, &ksp);CHKERRQ(ierr);
36   ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
37   ierr = PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);CHKERRQ(ierr);
38   ierr = PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);CHKERRQ(ierr);
39   ierr = PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);CHKERRQ(ierr);
40   ierr = PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);CHKERRQ(ierr);
41   ierr = PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);CHKERRQ(ierr);
42   ierr = MatGetSize(B,&n_I,NULL);CHKERRQ(ierr);
43   if (n_I) {
44     if (!Bdense) {
45       ierr = MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);CHKERRQ(ierr);
46     } else {
47       Bd = B;
48     }
49 
50     if (isLU || isILU || isCHOL) {
51       Mat fact;
52       ierr = KSPSetUp(ksp);CHKERRQ(ierr);
53       ierr = PCFactorGetMatrix(pc, &fact);CHKERRQ(ierr);
54       ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
55       ierr = MatMatSolve(fact, Bd, AinvBd);CHKERRQ(ierr);
56     } else {
57       PetscBool ex = PETSC_TRUE;
58 
59       if (ex) {
60         Mat Ainvd;
61 
62         ierr = PCComputeExplicitOperator(pc, &Ainvd);CHKERRQ(ierr);
63         ierr = MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);CHKERRQ(ierr);
64         ierr = MatDestroy(&Ainvd);CHKERRQ(ierr);
65       } else {
66         Vec         sol,rhs;
67         PetscScalar *arrayrhs,*arraysol;
68         PetscInt    i,nrhs,n;
69 
70         ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
71         ierr = MatGetSize(Bd,&n,&nrhs);CHKERRQ(ierr);
72         ierr = MatDenseGetArray(Bd,&arrayrhs);CHKERRQ(ierr);
73         ierr = MatDenseGetArray(AinvBd,&arraysol);CHKERRQ(ierr);
74         ierr = KSPGetSolution(ksp,&sol);CHKERRQ(ierr);
75         ierr = KSPGetRhs(ksp,&rhs);CHKERRQ(ierr);
76         for (i=0;i<nrhs;i++) {
77           ierr = VecPlaceArray(rhs,arrayrhs+i*n);CHKERRQ(ierr);
78           ierr = VecPlaceArray(sol,arraysol+i*n);CHKERRQ(ierr);
79           ierr = KSPSolve(ksp,rhs,sol);CHKERRQ(ierr);
80           ierr = VecResetArray(rhs);CHKERRQ(ierr);
81           ierr = VecResetArray(sol);CHKERRQ(ierr);
82         }
83         ierr = MatDenseRestoreArray(Bd,&arrayrhs);CHKERRQ(ierr);
84         ierr = MatDenseRestoreArray(AinvBd,&arrayrhs);CHKERRQ(ierr);
85       }
86     }
87     if (!Bdense & !issym) {
88       ierr = MatDestroy(&Bd);CHKERRQ(ierr);
89     }
90 
91     if (!issym) {
92       if (!Cdense) {
93         ierr = MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);CHKERRQ(ierr);
94       } else {
95         Cd = C;
96       }
97       ierr = MatMatMult(Cd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
98       if (!Cdense) {
99         ierr = MatDestroy(&Cd);CHKERRQ(ierr);
100       }
101     } else {
102       ierr = MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
103       if (!Bdense) {
104         ierr = MatDestroy(&Bd);CHKERRQ(ierr);
105       }
106     }
107     ierr = MatDestroy(&AinvBd);CHKERRQ(ierr);
108   }
109 
110   if (D) {
111     Mat       Dd;
112     PetscBool Ddense;
113 
114     ierr = PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);CHKERRQ(ierr);
115     if (!Ddense) {
116       ierr = MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);CHKERRQ(ierr);
117     } else {
118       Dd = D;
119     }
120     if (n_I) {
121       ierr = MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
122     } else {
123       if (reuse == MAT_INITIAL_MATRIX) {
124         ierr = MatDuplicate(Dd,MAT_COPY_VALUES,S);CHKERRQ(ierr);
125       } else {
126         ierr = MatCopy(Dd,*S,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
127       }
128     }
129     if (!Ddense) {
130       ierr = MatDestroy(&Dd);CHKERRQ(ierr);
131     }
132   } else {
133     ierr = MatScale(*S,-1.0);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "PCBDDCSubSchursSetUp"
140 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool invert_Stildas, PetscBool use_edges, PetscBool use_faces)
141 {
142   Mat                    A_II,A_IB,A_BI,A_BB,AE_II;
143   Mat                    S_all,S_all_inv;
144   Mat                    global_schur_subsets,work_mat;
145   Mat                    S_Ej_tilda_all,S_Ej_inv_all;
146   ISLocalToGlobalMapping l2gmap_subsets;
147   IS                     is_I,temp_is;
148   PetscInt               *nnz,*all_local_idx_G,*all_local_idx_N;
149   PetscInt               i,subset_size,max_subset_size;
150   PetscInt               extra,local_size,global_size;
151   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
152   PetscScalar            *Bwork;
153   PetscSubcomm           subcomm;
154   PetscMPIInt            color,rank;
155   MPI_Comm               comm_n;
156   PetscErrorCode         ierr;
157 
158   PetscFunctionBegin;
159   /* preliminary checks */
160   if (!sub_schurs->use_mumps && compute_Stilda) {
161     SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
162   }
163   /* determine if we are dealing with hermitian positive definite problems */
164   sub_schurs->is_hermitian = PETSC_FALSE;
165   sub_schurs->is_posdef = PETSC_FALSE;
166   if (sub_schurs->A) {
167     PetscInt lsize;
168 
169     ierr = MatGetSize(sub_schurs->A,&lsize,NULL);CHKERRQ(ierr);
170     if (lsize) {
171       ierr = MatIsHermitian(sub_schurs->A,0.0,&sub_schurs->is_hermitian);CHKERRQ(ierr);
172       if (sub_schurs->is_hermitian) {
173         PetscScalar val;
174         Vec         vec1,vec2;
175 
176         ierr = MatCreateVecs(sub_schurs->A,&vec1,&vec2);CHKERRQ(ierr);
177         ierr = VecSetRandom(vec1,NULL);
178         ierr = VecCopy(vec1,vec2);CHKERRQ(ierr);
179         ierr = MatMult(sub_schurs->A,vec2,vec1);CHKERRQ(ierr);
180         ierr = VecDot(vec1,vec2,&val);CHKERRQ(ierr);
181         if (PetscRealPart(val) > 0. && PetscImaginaryPart(val) == 0.) sub_schurs->is_posdef = PETSC_TRUE;
182         ierr = VecDestroy(&vec1);CHKERRQ(ierr);
183         ierr = VecDestroy(&vec2);CHKERRQ(ierr);
184       }
185     } else {
186       sub_schurs->is_hermitian = PETSC_TRUE;
187       sub_schurs->is_posdef = PETSC_TRUE;
188     }
189     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
190       SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"General matrix pencils are not currently supported");
191     }
192   }
193   /* restrict work on active processes */
194   color = 0;
195   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
196   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);CHKERRQ(ierr);
197   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);CHKERRQ(ierr);
198   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
199   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
200   ierr = PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);CHKERRQ(ierr);
201   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
202   if (!sub_schurs->n_subs) {
203     ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
204     PetscFunctionReturn(0);
205   }
206 
207   /* get Schur complement matrices */
208   if (!sub_schurs->use_mumps) {
209     PetscBool isseqaij;
210     ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr);
211     ierr = PetscObjectTypeCompare((PetscObject)A_BB,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
212     if (!isseqaij) {
213       ierr = MatConvert(A_BB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BB);CHKERRQ(ierr);
214       ierr = MatConvert(A_IB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_IB);CHKERRQ(ierr);
215       ierr = MatConvert(A_BI,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BI);CHKERRQ(ierr);
216     }
217   } else {
218     A_II = NULL;
219     A_IB = NULL;
220     A_BI = NULL;
221     A_BB = NULL;
222   }
223   S_all = NULL;
224   S_all_inv = NULL;
225   S_Ej_tilda_all = NULL;
226   S_Ej_inv_all = NULL;
227 
228   /* determine interior problems */
229   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
230   ierr = ISGetLocalSize(sub_schurs->is_I,&i);CHKERRQ(ierr);
231   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
232     PetscBT                touched;
233     const PetscInt*        idx_B;
234     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
235 
236     if (xadj == NULL || adjncy == NULL) {
237       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
238     }
239     /* get sizes */
240     ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
241     ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
242 
243     ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr);
244     ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr);
245     ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr);
246 
247     /* all boundary dofs must be skipped when adding layers */
248     ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
249     for (j=0;j<n_B;j++) {
250       ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr);
251     }
252     ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr);
253     ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
254 
255     /* add prescribed number of layers of dofs */
256     n_local_dofs = n_B;
257     n_prev_added = n_B;
258     for (layer=0;layer<nlayers;layer++) {
259       PetscInt n_added;
260       if (n_local_dofs == n_I+n_B) break;
261       if (n_local_dofs > n_I+n_B) {
262         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
263       }
264       ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr);
265       n_prev_added = n_added;
266       n_local_dofs += n_added;
267       if (!n_added) break;
268     }
269     ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
270 
271     /* IS for I layer dofs in original numbering */
272     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr);
273     ierr = PetscFree(local_numbering);CHKERRQ(ierr);
274     ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr);
275     /* IS for I layer dofs in I numbering */
276     if (!sub_schurs->use_mumps) {
277       ISLocalToGlobalMapping ItoNmap;
278       ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr);
279       ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr);
280       ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr);
281 
282       /* II block */
283       ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr);
284     }
285   } else {
286     PetscInt n_I;
287 
288     /* IS for I dofs in original numbering */
289     ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr);
290     sub_schurs->is_I_layer = sub_schurs->is_I;
291 
292     /* IS for I dofs in I numbering (strided 1) */
293     if (!sub_schurs->use_mumps) {
294       ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
295       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr);
296 
297       /* II block is the same */
298       ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr);
299       AE_II = A_II;
300     }
301   }
302 
303   /* Get info on subset sizes and sum of all subsets sizes */
304   max_subset_size = 0;
305   local_size = 0;
306   for (i=0;i<sub_schurs->n_subs;i++) {
307     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
308     max_subset_size = PetscMax(subset_size,max_subset_size);
309     local_size += subset_size;
310   }
311 
312   /* Work arrays for local indices */
313   extra = 0;
314   if (sub_schurs->use_mumps) {
315     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&extra);CHKERRQ(ierr);
316   }
317   ierr = PetscMalloc1(local_size+extra,&all_local_idx_N);CHKERRQ(ierr);
318   if (extra) {
319     const PetscInt *idxs;
320     ierr = ISGetIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
321     ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr);
322     ierr = ISRestoreIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
323   }
324   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
325 
326   /* Get local indices in local numbering */
327   local_size = 0;
328   for (i=0;i<sub_schurs->n_subs;i++) {
329     PetscInt j;
330     const    PetscInt *idxs;
331 
332     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
333     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
334     /* subset indices in local numbering */
335     ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
336     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
337     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
338     local_size += subset_size;
339   }
340 
341   /* allocate extra workspace needed only for GETRI */
342   Bwork = NULL;
343   pivots = NULL;
344   if (sub_schurs->n_subs && !sub_schurs->is_hermitian) {
345     PetscScalar lwork;
346 
347     B_lwork = -1;
348     ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
349     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
350     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
351     ierr = PetscFPTrapPop();CHKERRQ(ierr);
352     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
353     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
354     ierr = PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);CHKERRQ(ierr);
355   }
356 
357   /* prepare parallel matrices for summing up properly schurs on subsets */
358   ierr = PCBDDCSubsetNumbering(comm_n,sub_schurs->l2gmap,local_size,all_local_idx_N+extra,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
359   ierr = ISLocalToGlobalMappingCreate(comm_n,1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr);
360   ierr = MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr);
361   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
362   ierr = MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);CHKERRQ(ierr);
363   ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr);
364   ierr = MatSetType(global_schur_subsets,MATMPIAIJ);CHKERRQ(ierr);
365 
366   /* subset indices in local boundary numbering */
367   if (!sub_schurs->is_Ej_all) {
368     PetscInt *all_local_idx_B;
369 
370     ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
371     ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr);
372     if (subset_size != local_size) {
373       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
374     }
375     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr);
376   }
377 
378   /* Local matrix of all local Schur on subsets (transposed) */
379   if (!sub_schurs->S_Ej_all) {
380     ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
381     ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
382     ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
383     ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
384   } else {
385     ierr = MatZeroEntries(sub_schurs->S_Ej_all);CHKERRQ(ierr);
386   }
387 
388   /* Compute Schur complements explicitly */
389   ierr = PetscBTMemzero(sub_schurs->n_subs,sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
390   if (!sub_schurs->use_mumps) {
391     Mat         S_Ej_expl;
392     PetscScalar *work;
393     PetscInt    j,*dummy_idx;
394     PetscBool   Sdense;
395 
396     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
397     local_size = 0;
398     for (i=0;i<sub_schurs->n_subs;i++) {
399       IS  is_subset_B;
400       Mat AE_EE,AE_IE,AE_EI,S_Ej;
401 
402       /* subsets in original and boundary numbering */
403       ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);CHKERRQ(ierr);
404       /* EE block */
405       ierr = MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);CHKERRQ(ierr);
406       /* IE block */
407       ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);CHKERRQ(ierr);
408       /* EI block */
409       if (sub_schurs->is_hermitian) {
410         ierr = MatCreateTranspose(AE_IE,&AE_EI);CHKERRQ(ierr);
411       } else {
412         ierr = MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);CHKERRQ(ierr);
413       }
414       ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);CHKERRQ(ierr);
415       ierr = MatDestroy(&AE_EE);CHKERRQ(ierr);
416       ierr = MatDestroy(&AE_IE);CHKERRQ(ierr);
417       ierr = MatDestroy(&AE_EI);CHKERRQ(ierr);
418       if (AE_II == A_II) { /* we can reuse the same ksp */
419         KSP ksp;
420         ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr);
421         ierr = MatSchurComplementSetKSP(S_Ej,ksp);CHKERRQ(ierr);
422       } else { /* build new ksp object which inherits ksp and pc types from the original one */
423         KSP       origksp,schurksp;
424         PC        origpc,schurpc;
425         KSPType   ksp_type;
426         PetscInt  n_internal;
427         PetscBool ispcnone;
428 
429         ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr);
430         ierr = MatSchurComplementGetKSP(S_Ej,&schurksp);CHKERRQ(ierr);
431         ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr);
432         ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr);
433         ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr);
434         ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr);
435         ierr = PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);CHKERRQ(ierr);
436         if (!ispcnone) {
437           PCType pc_type;
438           ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr);
439           ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr);
440         } else {
441           ierr = PCSetType(schurpc,PCLU);CHKERRQ(ierr);
442         }
443         ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr);
444         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
445           MatSolverPackage solver=NULL;
446           ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
447           if (solver) {
448             ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr);
449           }
450         }
451         ierr = KSPSetUp(schurksp);CHKERRQ(ierr);
452       }
453       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
454       ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);CHKERRQ(ierr);
455       ierr = PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);CHKERRQ(ierr);
456       ierr = PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);CHKERRQ(ierr);
457       if (Sdense) {
458         for (j=0;j<subset_size;j++) {
459           dummy_idx[j]=local_size+j;
460         }
461         ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
462       } else {
463         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
464       }
465       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
466       local_size += subset_size;
467     }
468     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
469     /* free */
470     ierr = ISDestroy(&is_I);CHKERRQ(ierr);
471     ierr = MatDestroy(&AE_II);CHKERRQ(ierr);
472     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
473   } else {
474     Mat         A,F;
475     IS          is_A_all;
476     PetscScalar *work;
477     PetscInt    *idxs_schur,n_I,*dummy_idx;
478 
479     /* get working mat */
480     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&n_I);CHKERRQ(ierr);
481     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);CHKERRQ(ierr);
482     ierr = MatGetSubMatrixUnsorted(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr);
483     ierr = ISDestroy(&is_A_all);CHKERRQ(ierr);
484 
485     if (n_I) {
486       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
487         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr);
488       } else {
489         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr);
490       }
491 
492       /* subsets ordered last */
493       ierr = PetscMalloc1(local_size,&idxs_schur);CHKERRQ(ierr);
494       for (i=0;i<local_size;i++) {
495         idxs_schur[i] = n_I+i+1;
496       }
497 #if defined(PETSC_HAVE_MUMPS)
498       ierr = MatMumpsSetSchurIndices(F,local_size,idxs_schur);CHKERRQ(ierr);
499 #endif
500       ierr = PetscFree(idxs_schur);CHKERRQ(ierr);
501 
502       /* factorization step */
503       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
504         ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr);
505         ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr);
506       } else {
507         ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr);
508         ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr);
509       }
510 
511       /* get explicit Schur Complement computed during numeric factorization */
512 #if defined(PETSC_HAVE_MUMPS)
513       ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr);
514 #endif
515       ierr = MatDestroy(&F);CHKERRQ(ierr);
516     } else {
517       ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);CHKERRQ(ierr);
518     }
519     ierr = MatDestroy(&A);CHKERRQ(ierr);
520     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
521 
522     if (compute_Stilda) { /* TODO PICKUP BETTER NAMES */
523       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_tilda_all);CHKERRQ(ierr);
524       ierr = MatSetSizes(S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
525       ierr = MatSetType(S_Ej_tilda_all,MATAIJ);CHKERRQ(ierr);
526       ierr = MatSeqAIJSetPreallocation(S_Ej_tilda_all,0,nnz);CHKERRQ(ierr);
527       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_inv_all);CHKERRQ(ierr);
528       ierr = MatSetSizes(S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
529       ierr = MatSetType(S_Ej_inv_all,MATAIJ);CHKERRQ(ierr);
530       ierr = MatSeqAIJSetPreallocation(S_Ej_inv_all,0,nnz);CHKERRQ(ierr);
531     }
532 
533     /* Get St^-1 */
534     if (compute_Stilda) {
535       PetscScalar *vals;
536       ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
537       ierr = MatDuplicate(S_all,MAT_COPY_VALUES,&S_all_inv);CHKERRQ(ierr);
538       ierr = MatDenseGetArray(S_all_inv,&vals);CHKERRQ(ierr);
539       if (!sub_schurs->is_hermitian) {
540         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,vals,&B_N,pivots,&B_ierr));
541         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
542         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,vals,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
543         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
544       } else {
545         PetscInt j,k;
546 
547         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,vals,&B_N,&B_ierr));
548         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
549         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,vals,&B_N,&B_ierr));
550         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
551         for (j=0;j<B_N;j++) {
552           for (k=j+1;k<B_N;k++) {
553             vals[k*B_N+j] = vals[j*B_N+k];
554           }
555         }
556       }
557       ierr = MatDenseRestoreArray(S_all_inv,&vals);CHKERRQ(ierr);
558     }
559 
560     /* Work arrays */
561     if (sub_schurs->n_subs == 1) {
562       ierr = PetscMalloc1(max_subset_size,&dummy_idx);CHKERRQ(ierr);
563     } else {
564       ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
565     }
566 
567     local_size = 0;
568     for (i=0;i<sub_schurs->n_subs;i++) {
569       Mat S_Ej;
570       IS  is_E;
571       PetscInt j;
572 
573       /* get S_E */
574       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
575       if (sub_schurs->n_subs == 1) {
576         ierr = MatDenseGetArray(S_all,&work);CHKERRQ(ierr);
577         S_Ej = NULL;
578         is_E = NULL;
579       } else {
580         ierr = ISCreateStride(PETSC_COMM_SELF,subset_size,local_size,1,&is_E);CHKERRQ(ierr);
581         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej);CHKERRQ(ierr);
582         ierr = MatGetSubMatrix(S_all,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
583       }
584       /* insert S_E values */
585       for (j=0;j<subset_size;j++) {
586         dummy_idx[j]=local_size+j;
587       }
588       ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
589 
590       /* if adaptivity is requested, invert S_E and insert St_E^-1 blocks */
591       if (compute_Stilda && ((PetscBTLookup(sub_schurs->is_edge,i) && use_edges) || (!PetscBTLookup(sub_schurs->is_edge,i) && use_faces))) {
592         /* get S_E^-1 */
593         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
594         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
595         if (!sub_schurs->is_hermitian) {
596           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
597           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
598           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
599           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
600         } else {
601           PetscInt j,k;
602 
603           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
604           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
605           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
606           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
607           for (j=0;j<B_N;j++) {
608             for (k=j+1;k<B_N;k++) {
609               work[k*B_N+j] = work[j*B_N+k];
610             }
611           }
612         }
613         ierr = PetscFPTrapPop();CHKERRQ(ierr);
614         ierr = MatSetValues(S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
615 
616         /* get St_E^-1 */
617         if (sub_schurs->n_subs == 1) {
618           ierr = MatDenseRestoreArray(S_all,&work);CHKERRQ(ierr);
619           ierr = MatDenseGetArray(S_all_inv,&work);CHKERRQ(ierr);
620         } else {
621           ierr = MatGetSubMatrix(S_all_inv,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
622         }
623         ierr = MatSetValues(S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
624         if (sub_schurs->n_subs == 1) {
625           ierr = MatDenseRestoreArray(S_all_inv,&work);CHKERRQ(ierr);
626         }
627         ierr = PetscBTSet(sub_schurs->computed_Stilda_subs,i);CHKERRQ(ierr);
628       } else if (sub_schurs->n_subs == 1) {
629         ierr = MatDenseRestoreArray(S_all,&work);CHKERRQ(ierr);
630       }
631       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
632       ierr = ISDestroy(&is_E);CHKERRQ(ierr);
633       local_size += subset_size;
634     }
635     if (sub_schurs->n_subs == 1) {
636       ierr = PetscFree(dummy_idx);CHKERRQ(ierr);
637     } else {
638       ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
639     }
640   }
641   ierr = PetscFree(nnz);CHKERRQ(ierr);
642   ierr = MatDestroy(&S_all);CHKERRQ(ierr);
643   ierr = MatDestroy(&S_all_inv);CHKERRQ(ierr);
644   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
645   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646   if (compute_Stilda) {
647     ierr = MatAssemblyBegin(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
648     ierr = MatAssemblyEnd(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
649     ierr = MatAssemblyBegin(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
650     ierr = MatAssemblyEnd(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
651   }
652 
653   /* Global matrix of all assembled Schur on subsets */
654   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
655   ierr = MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);CHKERRQ(ierr);
656   ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
657 
658   /* Get local part of (\sum_j S_Ej) */
659   ierr = ISCreateGeneral(comm_n,local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr);
660   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
661   ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
662 
663   /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
664   if (faster_deluxe) {
665     Mat         tmpmat;
666     PetscScalar *array;
667     PetscInt    cum;
668 
669     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
670     cum = 0;
671     for (i=0;i<sub_schurs->n_subs;i++) {
672       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
673       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
674       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
675       if (!sub_schurs->is_hermitian) {
676         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
677         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
678         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
679         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
680       } else {
681         PetscInt j,k;
682 
683         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
684         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
685         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
686         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
687         for (j=0;j<B_N;j++) {
688           for (k=j+1;k<B_N;k++) {
689             array[k*B_N+j+cum] = array[j*B_N+k+cum];
690           }
691         }
692       }
693       ierr = PetscFPTrapPop();CHKERRQ(ierr);
694       cum += subset_size*subset_size;
695     }
696     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
697     ierr = MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);CHKERRQ(ierr);
698     ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
699     ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
700     sub_schurs->S_Ej_all = tmpmat;
701   }
702 
703   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) and invert them */
704   if (compute_Stilda) {
705     PetscInt    cum;
706     PetscScalar *array,*array2;
707 
708     ierr = MatISSetLocalMat(work_mat,S_Ej_tilda_all);CHKERRQ(ierr);
709     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
710     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
711     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
712     ierr = MatISSetLocalMat(work_mat,S_Ej_inv_all);CHKERRQ(ierr);
713     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
714     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
715     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
716     /* invert blocks */
717     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
718     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
719     cum = 0;
720     for (i=0;i<sub_schurs->n_subs;i++) {
721       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
722       if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
723         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
724         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
725         if (!sub_schurs->is_hermitian) {
726           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
727           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
728           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
729           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
730         } else {
731           PetscInt j,k;
732 
733           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
734           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
735           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
736           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
737           for (j=0;j<B_N;j++) {
738             for (k=j+1;k<B_N;k++) {
739               array[k*B_N+j+cum] = array[j*B_N+k+cum];
740             }
741           }
742         }
743         if (invert_Stildas) { /* Stildas can be singular */
744           if (!sub_schurs->is_hermitian) {
745             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array2+cum,&B_N,pivots,&B_ierr));
746             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
747             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array2+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
748             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
749           } else {
750             PetscInt j,k;
751 
752             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array2+cum,&B_N,&B_ierr));
753             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
754             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array2+cum,&B_N,&B_ierr));
755             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
756             for (j=0;j<B_N;j++) {
757               for (k=j+1;k<B_N;k++) {
758                 array2[k*B_N+j+cum] = array2[j*B_N+k+cum];
759               }
760             }
761           }
762         }
763         ierr = PetscFPTrapPop();CHKERRQ(ierr);
764       }
765       cum += subset_size*subset_size;
766     }
767     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
768     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
769   }
770 
771   /* free workspace */
772   ierr = PetscFree2(Bwork,pivots);CHKERRQ(ierr);
773   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
774   ierr = MatDestroy(&S_Ej_tilda_all);CHKERRQ(ierr);
775   ierr = MatDestroy(&S_Ej_inv_all);CHKERRQ(ierr);
776   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
777   ierr = ISDestroy(&temp_is);CHKERRQ(ierr);
778   ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "PCBDDCSubSchursInit"
784 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
785 {
786   IS              *faces,*edges,*all_cc,vertices;
787   PetscInt        i,n_faces,n_edges,n_all_cc;
788   PetscBool       is_sorted;
789   PetscErrorCode  ierr;
790 
791   PetscFunctionBegin;
792   ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr);
793   if (!is_sorted) {
794     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
795   }
796   ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr);
797   if (!is_sorted) {
798     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
799   }
800 
801   /* reset any previous data */
802   ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr);
803 
804   /* get index sets for faces and edges (already sorted by global ordering) */
805   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);CHKERRQ(ierr);
806   n_all_cc = n_faces+n_edges;
807   ierr = PetscBTCreate(n_all_cc,&sub_schurs->is_edge);CHKERRQ(ierr);
808   ierr = PetscBTCreate(n_all_cc,&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
809   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
810   for (i=0;i<n_faces;i++) {
811     all_cc[i] = faces[i];
812   }
813   for (i=0;i<n_edges;i++) {
814     all_cc[n_faces+i] = edges[i];
815     ierr = PetscBTSet(sub_schurs->is_edge,n_faces+i);CHKERRQ(ierr);
816   }
817   ierr = PetscFree(faces);CHKERRQ(ierr);
818   ierr = PetscFree(edges);CHKERRQ(ierr);
819 
820   /* Determine if MUMPS cane be used */
821   sub_schurs->use_mumps = PETSC_FALSE;
822 #if defined(PETSC_HAVE_MUMPS)
823   sub_schurs->use_mumps = (PetscBool)(!!A);
824 #endif
825 
826   /* update info in sub_schurs */
827   if (A) {
828     PetscBool isseqaij;
829 
830     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
831     if (isseqaij) {
832       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
833       sub_schurs->A = A;
834     } else { /* SeqBAIJ matrices does not support symmetry checking, SeqSBAIJ does not support MatPermute */
835       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);CHKERRQ(ierr);
836     }
837   }
838   ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr);
839   sub_schurs->S = S;
840   ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr);
841   sub_schurs->is_I = is_I;
842   ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr);
843   sub_schurs->is_B = is_B;
844   ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr);
845   sub_schurs->l2gmap = graph->l2gmap;
846   ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr);
847   sub_schurs->BtoNmap = BtoNmap;
848   sub_schurs->n_subs = n_all_cc;
849   sub_schurs->is_subs = all_cc;
850   if (!sub_schurs->use_mumps) { /* for adaptive selection */
851     for (i=0;i<sub_schurs->n_subs;i++) {
852       ierr = ISSort(sub_schurs->is_subs[i]);CHKERRQ(ierr);
853     }
854   }
855   sub_schurs->is_Ej_com = vertices;
856 
857 
858   /* allocate space for schur complements */
859   sub_schurs->S_Ej_all = NULL;
860   sub_schurs->sum_S_Ej_all = NULL;
861   sub_schurs->sum_S_Ej_inv_all = NULL;
862   sub_schurs->sum_S_Ej_tilda_all = NULL;
863   sub_schurs->is_Ej_all = NULL;
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSubSchursCreate"
869 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
870 {
871   PCBDDCSubSchurs schurs_ctx;
872   PetscErrorCode  ierr;
873 
874   PetscFunctionBegin;
875   ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr);
876   schurs_ctx->n_subs = 0;
877   *sub_schurs = schurs_ctx;
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "PCBDDCSubSchursDestroy"
883 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
884 {
885   PetscErrorCode ierr;
886 
887   PetscFunctionBegin;
888   ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr);
889   ierr = PetscFree(*sub_schurs);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "PCBDDCSubSchursReset"
895 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
896 {
897   PetscInt       i;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
902   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
903   ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr);
904   ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr);
905   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr);
906   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr);
907   ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
908   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
909   ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
910   ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
911   ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr);
912   ierr = ISDestroy(&sub_schurs->is_Ej_com);CHKERRQ(ierr);
913   ierr = PetscBTDestroy(&sub_schurs->is_edge);CHKERRQ(ierr);
914   ierr = PetscBTDestroy(&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
915   for (i=0;i<sub_schurs->n_subs;i++) {
916     ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr);
917   }
918   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
919   if (sub_schurs->n_subs) {
920     ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr);
921   }
922   sub_schurs->n_subs = 0;
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private"
928 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
929 {
930   PetscInt       i,j,n;
931   PetscErrorCode ierr;
932 
933   PetscFunctionBegin;
934   n = 0;
935   for (i=-n_prev;i<0;i++) {
936     PetscInt start_dof = queue_tip[i];
937     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
938       PetscInt dof = adjncy[j];
939       if (!PetscBTLookup(touched,dof)) {
940         ierr = PetscBTSet(touched,dof);CHKERRQ(ierr);
941         queue_tip[n] = dof;
942         n++;
943       }
944     }
945   }
946   *n_added = n;
947   PetscFunctionReturn(0);
948 }
949