xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 5a95e1ceb00aaf913daa567a70a604cb4fd63728)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCComputeExplicitSchur"
10 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
11 {
12   Mat            B, C, D, Bd, Cd, AinvBd;
13   KSP            ksp;
14   PC             pc;
15   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
16   PetscReal      fill = 2.0;
17   PetscInt       n_I;
18   PetscMPIInt    size;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);CHKERRQ(ierr);
23   if (size != 1) {
24     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
25   }
26   if (reuse == MAT_REUSE_MATRIX) {
27     PetscBool Sdense;
28 
29     ierr = PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);CHKERRQ(ierr);
30     if (!Sdense) {
31       SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
32     }
33   }
34   ierr = MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);CHKERRQ(ierr);
35   ierr = MatSchurComplementGetKSP(M, &ksp);CHKERRQ(ierr);
36   ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
37   ierr = PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);CHKERRQ(ierr);
38   ierr = PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);CHKERRQ(ierr);
39   ierr = PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);CHKERRQ(ierr);
40   ierr = PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);CHKERRQ(ierr);
41   ierr = PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);CHKERRQ(ierr);
42   ierr = MatGetSize(B,&n_I,NULL);CHKERRQ(ierr);
43   if (n_I) {
44     if (!Bdense) {
45       ierr = MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);CHKERRQ(ierr);
46     } else {
47       Bd = B;
48     }
49 
50     if (isLU || isILU || isCHOL) {
51       Mat fact;
52       ierr = KSPSetUp(ksp);CHKERRQ(ierr);
53       ierr = PCFactorGetMatrix(pc, &fact);CHKERRQ(ierr);
54       ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
55       ierr = MatMatSolve(fact, Bd, AinvBd);CHKERRQ(ierr);
56     } else {
57       PetscBool ex = PETSC_TRUE;
58 
59       if (ex) {
60         Mat Ainvd;
61 
62         ierr = PCComputeExplicitOperator(pc, &Ainvd);CHKERRQ(ierr);
63         ierr = MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);CHKERRQ(ierr);
64         ierr = MatDestroy(&Ainvd);CHKERRQ(ierr);
65       } else {
66         Vec         sol,rhs;
67         PetscScalar *arrayrhs,*arraysol;
68         PetscInt    i,nrhs,n;
69 
70         ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
71         ierr = MatGetSize(Bd,&n,&nrhs);CHKERRQ(ierr);
72         ierr = MatDenseGetArray(Bd,&arrayrhs);CHKERRQ(ierr);
73         ierr = MatDenseGetArray(AinvBd,&arraysol);CHKERRQ(ierr);
74         ierr = KSPGetSolution(ksp,&sol);CHKERRQ(ierr);
75         ierr = KSPGetRhs(ksp,&rhs);CHKERRQ(ierr);
76         for (i=0;i<nrhs;i++) {
77           ierr = VecPlaceArray(rhs,arrayrhs+i*n);CHKERRQ(ierr);
78           ierr = VecPlaceArray(sol,arraysol+i*n);CHKERRQ(ierr);
79           ierr = KSPSolve(ksp,rhs,sol);CHKERRQ(ierr);
80           ierr = VecResetArray(rhs);CHKERRQ(ierr);
81           ierr = VecResetArray(sol);CHKERRQ(ierr);
82         }
83         ierr = MatDenseRestoreArray(Bd,&arrayrhs);CHKERRQ(ierr);
84         ierr = MatDenseRestoreArray(AinvBd,&arrayrhs);CHKERRQ(ierr);
85       }
86     }
87     if (!Bdense & !issym) {
88       ierr = MatDestroy(&Bd);CHKERRQ(ierr);
89     }
90 
91     if (!issym) {
92       if (!Cdense) {
93         ierr = MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);CHKERRQ(ierr);
94       } else {
95         Cd = C;
96       }
97       ierr = MatMatMult(Cd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
98       if (!Cdense) {
99         ierr = MatDestroy(&Cd);CHKERRQ(ierr);
100       }
101     } else {
102       ierr = MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
103       if (!Bdense) {
104         ierr = MatDestroy(&Bd);CHKERRQ(ierr);
105       }
106     }
107     ierr = MatDestroy(&AinvBd);CHKERRQ(ierr);
108   }
109 
110   if (D) {
111     Mat       Dd;
112     PetscBool Ddense;
113 
114     ierr = PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);CHKERRQ(ierr);
115     if (!Ddense) {
116       ierr = MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);CHKERRQ(ierr);
117     } else {
118       Dd = D;
119     }
120     if (n_I) {
121       ierr = MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
122     } else {
123       if (reuse == MAT_INITIAL_MATRIX) {
124         ierr = MatDuplicate(Dd,MAT_COPY_VALUES,S);CHKERRQ(ierr);
125       } else {
126         ierr = MatCopy(Dd,*S,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
127       }
128     }
129     if (!Ddense) {
130       ierr = MatDestroy(&Dd);CHKERRQ(ierr);
131     }
132   } else {
133     ierr = MatScale(*S,-1.0);CHKERRQ(ierr);
134   }
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "PCBDDCSubSchursSetUp"
140 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool compute_Stilda, PetscBool invert_Stildas, PetscBool use_edges, PetscBool use_faces)
141 {
142   Mat                    A_II,A_IB,A_BI,A_BB,AE_II;
143   Mat                    S_all,S_all_inv;
144   Mat                    global_schur_subsets,work_mat;
145   Mat                    S_Ej_tilda_all,S_Ej_inv_all;
146   ISLocalToGlobalMapping l2gmap_subsets;
147   IS                     is_I,temp_is;
148   PetscInt               *nnz,*all_local_idx_G,*all_local_idx_N;
149   PetscInt               i,subset_size,max_subset_size;
150   PetscInt               extra,local_size,global_size;
151   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
152   PetscScalar            *Bwork;
153   PetscSubcomm           subcomm;
154   PetscMPIInt            color,rank;
155   MPI_Comm               comm_n;
156   PetscErrorCode         ierr;
157 
158   PetscFunctionBegin;
159   /* preliminary checks */
160   if (!sub_schurs->use_mumps && compute_Stilda) {
161     SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
162   }
163   /* determine if we are dealing with hermitian positive definite problems */
164   sub_schurs->is_hermitian = PETSC_FALSE;
165   sub_schurs->is_posdef = PETSC_FALSE;
166   if (sub_schurs->A) {
167     PetscInt lsize;
168 
169     ierr = MatGetSize(sub_schurs->A,&lsize,NULL);CHKERRQ(ierr);
170     if (lsize) {
171       ierr = MatIsHermitian(sub_schurs->A,0.0,&sub_schurs->is_hermitian);CHKERRQ(ierr);
172       if (sub_schurs->is_hermitian) {
173         PetscScalar val;
174         Vec         vec1,vec2;
175 
176         ierr = MatCreateVecs(sub_schurs->A,&vec1,&vec2);CHKERRQ(ierr);
177         ierr = VecSetRandom(vec1,NULL);
178         ierr = VecCopy(vec1,vec2);CHKERRQ(ierr);
179         ierr = MatMult(sub_schurs->A,vec2,vec1);CHKERRQ(ierr);
180         ierr = VecDot(vec1,vec2,&val);CHKERRQ(ierr);
181         if (PetscRealPart(val) > 0. && PetscImaginaryPart(val) == 0.) sub_schurs->is_posdef = PETSC_TRUE;
182         ierr = VecDestroy(&vec1);CHKERRQ(ierr);
183         ierr = VecDestroy(&vec2);CHKERRQ(ierr);
184       }
185     } else {
186       sub_schurs->is_hermitian = PETSC_TRUE;
187       sub_schurs->is_posdef = PETSC_TRUE;
188     }
189     if (compute_Stilda && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
190       SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"General matrix pencils are not currently supported");
191     }
192   }
193   /* restrict work on active processes */
194   color = 0;
195   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
196   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);CHKERRQ(ierr);
197   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);CHKERRQ(ierr);
198   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
199   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
200   ierr = PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);CHKERRQ(ierr);
201   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
202   if (!sub_schurs->n_subs) {
203     ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
204     PetscFunctionReturn(0);
205   }
206 
207   /* get Schur complement matrices */
208   if (!sub_schurs->use_mumps) {
209     PetscBool isseqaij;
210     ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr);
211     ierr = PetscObjectTypeCompare((PetscObject)A_BB,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
212     if (!isseqaij) {
213       ierr = MatConvert(A_BB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BB);CHKERRQ(ierr);
214       ierr = MatConvert(A_IB,MATSEQAIJ,MAT_REUSE_MATRIX,&A_IB);CHKERRQ(ierr);
215       ierr = MatConvert(A_BI,MATSEQAIJ,MAT_REUSE_MATRIX,&A_BI);CHKERRQ(ierr);
216     }
217   } else {
218     A_II = NULL;
219     A_IB = NULL;
220     A_BI = NULL;
221     A_BB = NULL;
222   }
223   S_all = NULL;
224   S_all_inv = NULL;
225   S_Ej_tilda_all = NULL;
226   S_Ej_inv_all = NULL;
227 
228   /* determine interior problems */
229   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
230   ierr = ISGetLocalSize(sub_schurs->is_I,&i);CHKERRQ(ierr);
231   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
232     PetscBT                touched;
233     const PetscInt*        idx_B;
234     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
235 
236     if (xadj == NULL || adjncy == NULL) {
237       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
238     }
239     /* get sizes */
240     ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
241     ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
242 
243     ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr);
244     ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr);
245     ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr);
246 
247     /* all boundary dofs must be skipped when adding layers */
248     ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
249     for (j=0;j<n_B;j++) {
250       ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr);
251     }
252     ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr);
253     ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
254 
255     /* add prescribed number of layers of dofs */
256     n_local_dofs = n_B;
257     n_prev_added = n_B;
258     for (layer=0;layer<nlayers;layer++) {
259       PetscInt n_added;
260       if (n_local_dofs == n_I+n_B) break;
261       if (n_local_dofs > n_I+n_B) {
262         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
263       }
264       ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr);
265       n_prev_added = n_added;
266       n_local_dofs += n_added;
267       if (!n_added) break;
268     }
269     ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
270 
271     /* IS for I layer dofs in original numbering */
272     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr);
273     ierr = PetscFree(local_numbering);CHKERRQ(ierr);
274     ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr);
275     /* IS for I layer dofs in I numbering */
276     if (!sub_schurs->use_mumps) {
277       ISLocalToGlobalMapping ItoNmap;
278       ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr);
279       ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr);
280       ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr);
281 
282       /* II block */
283       ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr);
284     }
285   } else {
286     PetscInt n_I;
287 
288     /* IS for I dofs in original numbering */
289     ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr);
290     sub_schurs->is_I_layer = sub_schurs->is_I;
291 
292     /* IS for I dofs in I numbering (strided 1) */
293     if (!sub_schurs->use_mumps) {
294       ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
295       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr);
296 
297       /* II block is the same */
298       ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr);
299       AE_II = A_II;
300     }
301   }
302 
303   /* Get info on subset sizes and sum of all subsets sizes */
304   max_subset_size = 0;
305   local_size = 0;
306   for (i=0;i<sub_schurs->n_subs;i++) {
307     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
308     max_subset_size = PetscMax(subset_size,max_subset_size);
309     local_size += subset_size;
310   }
311 
312   /* Work arrays for local indices */
313   extra = 0;
314   if (sub_schurs->use_mumps) {
315     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&extra);CHKERRQ(ierr);
316   }
317   ierr = PetscMalloc1(local_size+extra,&all_local_idx_N);CHKERRQ(ierr);
318   if (extra) {
319     const PetscInt *idxs;
320     ierr = ISGetIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
321     ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr);
322     ierr = ISRestoreIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
323   }
324   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
325 
326   /* Get local indices in local numbering */
327   local_size = 0;
328   for (i=0;i<sub_schurs->n_subs;i++) {
329     PetscInt j;
330     const    PetscInt *idxs;
331 
332     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
333     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
334     /* subset indices in local numbering */
335     ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
336     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
337     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
338     local_size += subset_size;
339   }
340 
341   /* allocate extra workspace needed only for GETRI */
342   Bwork = NULL;
343   pivots = NULL;
344   if (sub_schurs->n_subs && !sub_schurs->is_hermitian) {
345     PetscScalar lwork;
346 
347     B_lwork = -1;
348     ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
349     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
350     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
351     ierr = PetscFPTrapPop();CHKERRQ(ierr);
352     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
353     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
354     ierr = PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);CHKERRQ(ierr);
355   }
356 
357   /* prepare parallel matrices for summing up properly schurs on subsets */
358   ierr = PCBDDCSubsetNumbering(comm_n,sub_schurs->l2gmap,local_size,all_local_idx_N+extra,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
359   ierr = ISLocalToGlobalMappingCreate(comm_n,1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr);
360   ierr = MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr);
361   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
362   ierr = MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);CHKERRQ(ierr);
363   ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr);
364   ierr = MatSetType(global_schur_subsets,MATMPIAIJ);CHKERRQ(ierr);
365 
366   /* subset indices in local boundary numbering */
367   if (!sub_schurs->is_Ej_all) {
368     PetscInt *all_local_idx_B;
369 
370     ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
371     ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr);
372     if (subset_size != local_size) {
373       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
374     }
375     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr);
376   }
377 
378   /* Local matrix of all local Schur on subsets (transposed) */
379   if (!sub_schurs->S_Ej_all) {
380     ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
381     ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
382     ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
383     ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
384   } else {
385     ierr = MatZeroEntries(sub_schurs->S_Ej_all);CHKERRQ(ierr);
386   }
387 
388   /* Compute Schur complements explicitly */
389   ierr = PetscBTMemzero(sub_schurs->n_subs,sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
390   if (!sub_schurs->use_mumps) {
391     Mat         S_Ej_expl;
392     PetscScalar *work;
393     PetscInt    j,*dummy_idx;
394     PetscBool   Sdense;
395 
396     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
397     local_size = 0;
398     for (i=0;i<sub_schurs->n_subs;i++) {
399       IS  is_subset_B;
400       Mat AE_EE,AE_IE,AE_EI,S_Ej;
401 
402       /* subsets in original and boundary numbering */
403       ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);CHKERRQ(ierr);
404       /* EE block */
405       ierr = MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);CHKERRQ(ierr);
406       /* IE block */
407       ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);CHKERRQ(ierr);
408       /* EI block */
409       if (sub_schurs->is_hermitian) {
410         ierr = MatCreateTranspose(AE_IE,&AE_EI);CHKERRQ(ierr);
411       } else {
412         ierr = MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);CHKERRQ(ierr);
413       }
414       ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);CHKERRQ(ierr);
415       ierr = MatDestroy(&AE_EE);CHKERRQ(ierr);
416       ierr = MatDestroy(&AE_IE);CHKERRQ(ierr);
417       ierr = MatDestroy(&AE_EI);CHKERRQ(ierr);
418       if (AE_II == A_II) { /* we can reuse the same ksp */
419         KSP ksp;
420         ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr);
421         ierr = MatSchurComplementSetKSP(S_Ej,ksp);CHKERRQ(ierr);
422       } else { /* build new ksp object which inherits ksp and pc types from the original one */
423         KSP       origksp,schurksp;
424         PC        origpc,schurpc;
425         KSPType   ksp_type;
426         PetscInt  n_internal;
427         PetscBool ispcnone;
428 
429         ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr);
430         ierr = MatSchurComplementGetKSP(S_Ej,&schurksp);CHKERRQ(ierr);
431         ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr);
432         ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr);
433         ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr);
434         ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr);
435         ierr = PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);CHKERRQ(ierr);
436         if (!ispcnone) {
437           PCType pc_type;
438           ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr);
439           ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr);
440         } else {
441           ierr = PCSetType(schurpc,PCLU);CHKERRQ(ierr);
442         }
443         ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr);
444         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
445           MatSolverPackage solver=NULL;
446           ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
447           if (solver) {
448             ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr);
449           }
450         }
451         ierr = KSPSetUp(schurksp);CHKERRQ(ierr);
452       }
453       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
454       ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);CHKERRQ(ierr);
455       ierr = PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);CHKERRQ(ierr);
456       ierr = PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);CHKERRQ(ierr);
457       if (Sdense) {
458         for (j=0;j<subset_size;j++) {
459           dummy_idx[j]=local_size+j;
460         }
461         ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
462       } else {
463         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
464       }
465       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
466       local_size += subset_size;
467     }
468     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
469     /* free */
470     ierr = ISDestroy(&is_I);CHKERRQ(ierr);
471     ierr = MatDestroy(&AE_II);CHKERRQ(ierr);
472     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
473   } else {
474     Mat         A,F;
475     IS          is_A_all;
476     PetscScalar *work;
477     PetscInt    *idxs_schur,n_I,*dummy_idx;
478 
479     /* get working mat */
480     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&n_I);CHKERRQ(ierr);
481     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);CHKERRQ(ierr);
482     ierr = MatGetSubMatrixUnsorted(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr);
483     ierr = ISDestroy(&is_A_all);CHKERRQ(ierr);
484 
485     if (n_I) {
486       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
487         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr);
488       } else {
489         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr);
490       }
491 
492       /* subsets ordered last */
493       ierr = PetscMalloc1(local_size,&idxs_schur);CHKERRQ(ierr);
494       for (i=0;i<local_size;i++) {
495         idxs_schur[i] = n_I+i+1;
496       }
497 #if defined(PETSC_HAVE_MUMPS)
498       ierr = MatMumpsSetSchurIndices(F,local_size,idxs_schur);CHKERRQ(ierr);
499 #endif
500       ierr = PetscFree(idxs_schur);CHKERRQ(ierr);
501 
502       /* factorization step */
503       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
504         ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr);
505         ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr);
506       } else {
507         ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr);
508         ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr);
509       }
510 
511       /* get explicit Schur Complement computed during numeric factorization */
512 #if defined(PETSC_HAVE_MUMPS)
513       ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr);
514 #endif
515       ierr = MatDestroy(&F);CHKERRQ(ierr);
516     } else {
517       ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);CHKERRQ(ierr);
518     }
519     ierr = MatDestroy(&A);CHKERRQ(ierr);
520     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
521 
522     if (compute_Stilda) { /* TODO PICKUP BETTER NAMES */
523       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_tilda_all);CHKERRQ(ierr);
524       ierr = MatSetSizes(S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
525       ierr = MatSetType(S_Ej_tilda_all,MATAIJ);CHKERRQ(ierr);
526       ierr = MatSeqAIJSetPreallocation(S_Ej_tilda_all,0,nnz);CHKERRQ(ierr);
527       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_inv_all);CHKERRQ(ierr);
528       ierr = MatSetSizes(S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
529       ierr = MatSetType(S_Ej_inv_all,MATAIJ);CHKERRQ(ierr);
530       ierr = MatSeqAIJSetPreallocation(S_Ej_inv_all,0,nnz);CHKERRQ(ierr);
531     }
532 
533     /* Work arrays */
534     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
535 
536     /* Get St^-1 */
537     if (compute_Stilda) {
538       PetscScalar *vals;
539       ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
540       ierr = MatDuplicate(S_all,MAT_COPY_VALUES,&S_all_inv);CHKERRQ(ierr);
541       ierr = MatDenseGetArray(S_all_inv,&vals);CHKERRQ(ierr);
542       if (!sub_schurs->is_hermitian) {
543         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,vals,&B_N,pivots,&B_ierr));
544         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
545         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,vals,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
546         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
547       } else {
548         PetscInt j,k;
549 
550         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,vals,&B_N,&B_ierr));
551         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
552         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,vals,&B_N,&B_ierr));
553         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
554         for (j=0;j<B_N;j++) {
555           for (k=j+1;k<B_N;k++) {
556             vals[k*B_N+j] = vals[j*B_N+k];
557           }
558         }
559       }
560       ierr = MatDenseRestoreArray(S_all_inv,&vals);CHKERRQ(ierr);
561     }
562 
563     local_size = 0;
564     for (i=0;i<sub_schurs->n_subs;i++) {
565       Mat S_Ej;
566       IS  is_E;
567       PetscInt j;
568 
569       /* get S_E */
570       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
571       ierr = ISCreateStride(PETSC_COMM_SELF,subset_size,local_size,1,&is_E);CHKERRQ(ierr);
572       ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej);CHKERRQ(ierr);
573       ierr = MatGetSubMatrix(S_all,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
574 
575       /* insert S_E values */
576       for (j=0;j<subset_size;j++) {
577         dummy_idx[j]=local_size+j;
578       }
579       ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
580 
581       /* if adaptivity is requested, invert S_E and insert St_E^-1 blocks */
582       if (compute_Stilda && ((PetscBTLookup(sub_schurs->is_edge,i) && use_edges) || (!PetscBTLookup(sub_schurs->is_edge,i) && use_faces))) {
583         /* get S_E^-1 */
584         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
585         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
586         if (!sub_schurs->is_hermitian) {
587           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
588           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
589           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
590           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
591         } else {
592           PetscInt j,k;
593 
594           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
595           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
596           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
597           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
598           for (j=0;j<B_N;j++) {
599             for (k=j+1;k<B_N;k++) {
600               work[k*B_N+j] = work[j*B_N+k];
601             }
602           }
603         }
604         ierr = PetscFPTrapPop();CHKERRQ(ierr);
605         ierr = MatSetValues(S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
606 
607         /* get St_E^-1 */
608         ierr = PetscBTSet(sub_schurs->computed_Stilda_subs,i);CHKERRQ(ierr);
609         ierr = MatGetSubMatrix(S_all_inv,is_E,is_E,MAT_REUSE_MATRIX,&S_Ej);CHKERRQ(ierr);
610         ierr = MatSetValues(S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
611       }
612       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
613       ierr = ISDestroy(&is_E);CHKERRQ(ierr);
614       local_size += subset_size;
615     }
616     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
617   }
618   ierr = PetscFree(nnz);CHKERRQ(ierr);
619   ierr = MatDestroy(&S_all);CHKERRQ(ierr);
620   ierr = MatDestroy(&S_all_inv);CHKERRQ(ierr);
621   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
622   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
623   if (compute_Stilda) {
624     ierr = MatAssemblyBegin(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
625     ierr = MatAssemblyEnd(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
626     ierr = MatAssemblyBegin(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
627     ierr = MatAssemblyEnd(S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
628   }
629 
630   /* Global matrix of all assembled Schur on subsets */
631   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
632   ierr = MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);CHKERRQ(ierr);
633   ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
634 
635   /* Get local part of (\sum_j S_Ej) */
636   ierr = ISCreateGeneral(comm_n,local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr);
637   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
638   ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
639 
640   /* Compute explicitly (\sum_j S_Ej)^-1 */
641   {
642     Mat         tmpmat;
643     PetscScalar *array;
644     PetscInt    cum;
645 
646     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
647     cum = 0;
648     for (i=0;i<sub_schurs->n_subs;i++) {
649       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
650       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
651       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
652       if (!sub_schurs->is_hermitian) {
653         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
654         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
655         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
656         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
657       } else {
658         PetscInt j,k;
659 
660         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
661         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
662         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
663         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
664         for (j=0;j<B_N;j++) {
665           for (k=j+1;k<B_N;k++) {
666             array[k*B_N+j+cum] = array[j*B_N+k+cum];
667           }
668         }
669       }
670       ierr = PetscFPTrapPop();CHKERRQ(ierr);
671       cum += subset_size*subset_size;
672     }
673     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
674     ierr = MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);CHKERRQ(ierr);
675     ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
676     sub_schurs->S_Ej_all = tmpmat;
677   }
678 
679   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) and invert them */
680   if (compute_Stilda) {
681     PetscInt    cum;
682     PetscScalar *array,*array2;
683 
684     ierr = MatISSetLocalMat(work_mat,S_Ej_tilda_all);CHKERRQ(ierr);
685     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
686     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
687     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
688     ierr = MatISSetLocalMat(work_mat,S_Ej_inv_all);CHKERRQ(ierr);
689     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
690     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
691     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
692     /* invert blocks */
693     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
694     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
695     cum = 0;
696     for (i=0;i<sub_schurs->n_subs;i++) {
697       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
698       if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
699         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
700         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
701         if (!sub_schurs->is_hermitian) {
702           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
703           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
704           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
705           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
706         } else {
707           PetscInt j,k;
708 
709           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
710           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
711           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
712           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
713           for (j=0;j<B_N;j++) {
714             for (k=j+1;k<B_N;k++) {
715               array[k*B_N+j+cum] = array[j*B_N+k+cum];
716             }
717           }
718         }
719         if (invert_Stildas) { /* Stildas can be singular */
720           if (!sub_schurs->is_hermitian) {
721             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array2+cum,&B_N,pivots,&B_ierr));
722             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
723             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array2+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
724             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
725           } else {
726             PetscInt j,k;
727 
728             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array2+cum,&B_N,&B_ierr));
729             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
730             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array2+cum,&B_N,&B_ierr));
731             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
732             for (j=0;j<B_N;j++) {
733               for (k=j+1;k<B_N;k++) {
734                 array2[k*B_N+j+cum] = array2[j*B_N+k+cum];
735               }
736             }
737           }
738         }
739         ierr = PetscFPTrapPop();CHKERRQ(ierr);
740       }
741       cum += subset_size*subset_size;
742     }
743     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&array);CHKERRQ(ierr);
744     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array2);CHKERRQ(ierr);
745   }
746 
747   /* free workspace */
748   ierr = PetscFree2(Bwork,pivots);CHKERRQ(ierr);
749   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
750   ierr = MatDestroy(&S_Ej_tilda_all);CHKERRQ(ierr);
751   ierr = MatDestroy(&S_Ej_inv_all);CHKERRQ(ierr);
752   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
753   ierr = ISDestroy(&temp_is);CHKERRQ(ierr);
754   ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "PCBDDCSubSchursInit"
760 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
761 {
762   IS              *faces,*edges,*all_cc,vertices;
763   PetscInt        i,n_faces,n_edges,n_all_cc;
764   PetscBool       is_sorted;
765   PetscErrorCode  ierr;
766 
767   PetscFunctionBegin;
768   ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr);
769   if (!is_sorted) {
770     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
771   }
772   ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr);
773   if (!is_sorted) {
774     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
775   }
776 
777   /* reset any previous data */
778   ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr);
779 
780   /* get index sets for faces and edges (already sorted by global ordering) */
781   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);CHKERRQ(ierr);
782   n_all_cc = n_faces+n_edges;
783   ierr = PetscBTCreate(n_all_cc,&sub_schurs->is_edge);CHKERRQ(ierr);
784   ierr = PetscBTCreate(n_all_cc,&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
785   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
786   for (i=0;i<n_faces;i++) {
787     all_cc[i] = faces[i];
788   }
789   for (i=0;i<n_edges;i++) {
790     all_cc[n_faces+i] = edges[i];
791     ierr = PetscBTSet(sub_schurs->is_edge,n_faces+i);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(faces);CHKERRQ(ierr);
794   ierr = PetscFree(edges);CHKERRQ(ierr);
795 
796   /* Determine if MUMPS cane be used */
797   sub_schurs->use_mumps = PETSC_FALSE;
798 #if defined(PETSC_HAVE_MUMPS)
799   sub_schurs->use_mumps = (PetscBool)(!!A);
800 #endif
801 
802   /* update info in sub_schurs */
803   if (A) {
804     PetscBool isseqaij;
805 
806     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
807     if (isseqaij) {
808       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
809       sub_schurs->A = A;
810     } else { /* SeqBAIJ matrices does not support symmetry checking, SeqSBAIJ does not support MatPermute */
811       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);CHKERRQ(ierr);
812     }
813   }
814   ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr);
815   sub_schurs->S = S;
816   ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr);
817   sub_schurs->is_I = is_I;
818   ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr);
819   sub_schurs->is_B = is_B;
820   ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr);
821   sub_schurs->l2gmap = graph->l2gmap;
822   ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr);
823   sub_schurs->BtoNmap = BtoNmap;
824   sub_schurs->n_subs = n_all_cc;
825   sub_schurs->is_subs = all_cc;
826   if (!sub_schurs->use_mumps) { /* for adaptive selection */
827     for (i=0;i<sub_schurs->n_subs;i++) {
828       ierr = ISSort(sub_schurs->is_subs[i]);CHKERRQ(ierr);
829     }
830   }
831   sub_schurs->is_Ej_com = vertices;
832 
833 
834   /* allocate space for schur complements */
835   sub_schurs->S_Ej_all = NULL;
836   sub_schurs->sum_S_Ej_all = NULL;
837   sub_schurs->sum_S_Ej_inv_all = NULL;
838   sub_schurs->sum_S_Ej_tilda_all = NULL;
839   sub_schurs->is_Ej_all = NULL;
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "PCBDDCSubSchursCreate"
845 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
846 {
847   PCBDDCSubSchurs schurs_ctx;
848   PetscErrorCode  ierr;
849 
850   PetscFunctionBegin;
851   ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr);
852   schurs_ctx->n_subs = 0;
853   *sub_schurs = schurs_ctx;
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "PCBDDCSubSchursDestroy"
859 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
860 {
861   PetscErrorCode ierr;
862 
863   PetscFunctionBegin;
864   ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr);
865   ierr = PetscFree(*sub_schurs);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "PCBDDCSubSchursReset"
871 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
872 {
873   PetscInt       i;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
878   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
879   ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr);
880   ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr);
881   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr);
882   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr);
883   ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
884   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
885   ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
886   ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
887   ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr);
888   ierr = ISDestroy(&sub_schurs->is_Ej_com);CHKERRQ(ierr);
889   ierr = PetscBTDestroy(&sub_schurs->is_edge);CHKERRQ(ierr);
890   ierr = PetscBTDestroy(&sub_schurs->computed_Stilda_subs);CHKERRQ(ierr);
891   for (i=0;i<sub_schurs->n_subs;i++) {
892     ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr);
893   }
894   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
895   if (sub_schurs->n_subs) {
896     ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr);
897   }
898   sub_schurs->n_subs = 0;
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private"
904 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
905 {
906   PetscInt       i,j,n;
907   PetscErrorCode ierr;
908 
909   PetscFunctionBegin;
910   n = 0;
911   for (i=-n_prev;i<0;i++) {
912     PetscInt start_dof = queue_tip[i];
913     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
914       PetscInt dof = adjncy[j];
915       if (!PetscBTLookup(touched,dof)) {
916         ierr = PetscBTSet(touched,dof);CHKERRQ(ierr);
917         queue_tip[n] = dof;
918         n++;
919       }
920     }
921   }
922   *n_added = n;
923   PetscFunctionReturn(0);
924 }
925