xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 683d3df6f9c0b211a07d9c40891fbe6b48cc9b85)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
6 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
7 static PetscErrorCode PCBDDCMumpsInteriorSolve(PC,Vec,Vec);
8 static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC,Vec,Vec);
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "PCBDDCMumpsReuseSolve_Private"
12 static PetscErrorCode PCBDDCMumpsReuseSolve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
13 {
14   PCBDDCReuseMumps ctx;
15   PetscScalar      *array,*array_mumps;
16 #if defined(PETSC_HAVE_MUMPS)
17   PetscInt         ival;
18 #endif
19   PetscInt         n;
20   PetscBool        copy = PETSC_FALSE;
21   PetscErrorCode   ierr;
22 
23   PetscFunctionBegin;
24   ierr = PCShellGetContext(pc,(void **)&ctx);CHKERRQ(ierr);
25 #if defined(PETSC_HAVE_MUMPS)
26   ierr = MatMumpsGetIcntl(ctx->F,26,&ival);CHKERRQ(ierr);
27   if (full) {
28     ierr = MatMumpsSetIcntl(ctx->F,26,-1);CHKERRQ(ierr);
29     copy = ctx->has_vertices;
30   } else {
31     ierr = MatMumpsSetIcntl(ctx->F,26,0);CHKERRQ(ierr);
32     copy = PETSC_TRUE;
33   }
34 #endif
35   ierr = VecGetLocalSize(rhs,&n);CHKERRQ(ierr);
36   /* copy rhs into factored matrix workspace */
37   if (copy) {
38     ierr = VecGetArrayRead(rhs,(const PetscScalar**)&array);CHKERRQ(ierr);
39     ierr = VecGetArray(ctx->rhs,&array_mumps);CHKERRQ(ierr);
40     ierr = PetscMemcpy(array_mumps,array,n*sizeof(PetscScalar));CHKERRQ(ierr);
41     ierr = VecRestoreArray(ctx->rhs,&array_mumps);CHKERRQ(ierr);
42     ierr = VecRestoreArrayRead(rhs,(const PetscScalar**)&array);CHKERRQ(ierr);
43 
44     if (transpose) {
45       ierr = MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);CHKERRQ(ierr);
46     } else {
47       ierr = MatSolve(ctx->F,ctx->rhs,ctx->sol);CHKERRQ(ierr);
48     }
49 
50     /* get back data to caller worskpace */
51     ierr = VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);CHKERRQ(ierr);
52     ierr = VecGetArray(sol,&array);CHKERRQ(ierr);
53     ierr = PetscMemcpy(array,array_mumps,n*sizeof(PetscScalar));CHKERRQ(ierr);
54     ierr = VecRestoreArray(sol,&array);CHKERRQ(ierr);
55     ierr = VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_mumps);CHKERRQ(ierr);
56   } else {
57     if (transpose) {
58       ierr = MatSolveTranspose(ctx->F,rhs,sol);CHKERRQ(ierr);
59     } else {
60       ierr = MatSolve(ctx->F,rhs,sol);CHKERRQ(ierr);
61     }
62   }
63 
64 #if defined(PETSC_HAVE_MUMPS)
65   ierr = MatMumpsSetIcntl(ctx->F,26,ival);CHKERRQ(ierr);
66 #endif
67   PetscFunctionReturn(0);
68 }
69 
70 #undef __FUNCT__
71 #define __FUNCT__ "PCBDDCMumpsCorrectionSolve"
72 static PetscErrorCode PCBDDCMumpsCorrectionSolve(PC pc, Vec rhs, Vec sol)
73 {
74   PetscErrorCode   ierr;
75 
76   PetscFunctionBegin;
77   ierr = PCBDDCMumpsReuseSolve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCMumpsCorrectionSolveTranspose"
83 static PetscErrorCode PCBDDCMumpsCorrectionSolveTranspose(PC pc, Vec rhs, Vec sol)
84 {
85   PetscErrorCode   ierr;
86 
87   PetscFunctionBegin;
88   ierr = PCBDDCMumpsReuseSolve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 #undef __FUNCT__
93 #define __FUNCT__ "PCBDDCMumpsInteriorSolve"
94 static PetscErrorCode PCBDDCMumpsInteriorSolve(PC pc, Vec rhs, Vec sol)
95 {
96   PetscErrorCode   ierr;
97 
98   PetscFunctionBegin;
99   ierr = PCBDDCMumpsReuseSolve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "PCBDDCMumpsInteriorSolveTranspose"
105 static PetscErrorCode PCBDDCMumpsInteriorSolveTranspose(PC pc, Vec rhs, Vec sol)
106 {
107   PetscErrorCode   ierr;
108 
109   PetscFunctionBegin;
110   ierr = PCBDDCMumpsReuseSolve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCReuseMumpsReset"
116 static PetscErrorCode PCBDDCReuseMumpsReset(PCBDDCReuseMumps reuse)
117 {
118   PetscErrorCode ierr;
119 
120   PetscFunctionBegin;
121   ierr = MatDestroy(&reuse->F);CHKERRQ(ierr);
122   ierr = VecDestroy(&reuse->sol);CHKERRQ(ierr);
123   ierr = VecDestroy(&reuse->rhs);CHKERRQ(ierr);
124   ierr = PCDestroy(&reuse->interior_solver);CHKERRQ(ierr);
125   ierr = PCDestroy(&reuse->correction_solver);CHKERRQ(ierr);
126   ierr = ISDestroy(&reuse->is_R);CHKERRQ(ierr);
127   ierr = ISDestroy(&reuse->is_B);CHKERRQ(ierr);
128   ierr = VecScatterDestroy(&reuse->correction_scatter_B);CHKERRQ(ierr);
129   ierr = VecDestroy(&reuse->sol_B);CHKERRQ(ierr);
130   ierr = VecDestroy(&reuse->rhs_B);CHKERRQ(ierr);
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "PCBDDCComputeExplicitSchur"
136 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
137 {
138   Mat            B, C, D, Bd, Cd, AinvBd;
139   KSP            ksp;
140   PC             pc;
141   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
142   PetscReal      fill = 2.0;
143   PetscInt       n_I;
144   PetscMPIInt    size;
145   PetscErrorCode ierr;
146 
147   PetscFunctionBegin;
148   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);CHKERRQ(ierr);
149   if (size != 1) {
150     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
151   }
152   if (reuse == MAT_REUSE_MATRIX) {
153     PetscBool Sdense;
154 
155     ierr = PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);CHKERRQ(ierr);
156     if (!Sdense) {
157       SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should be dense");
158     }
159   }
160   ierr = MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);CHKERRQ(ierr);
161   ierr = MatSchurComplementGetKSP(M, &ksp);CHKERRQ(ierr);
162   ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
163   ierr = PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);CHKERRQ(ierr);
164   ierr = PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);CHKERRQ(ierr);
165   ierr = PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);CHKERRQ(ierr);
166   ierr = PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);CHKERRQ(ierr);
167   ierr = PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);CHKERRQ(ierr);
168   ierr = MatGetSize(B,&n_I,NULL);CHKERRQ(ierr);
169   if (n_I) {
170     if (!Bdense) {
171       ierr = MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);CHKERRQ(ierr);
172     } else {
173       Bd = B;
174     }
175 
176     if (isLU || isILU || isCHOL) {
177       Mat fact;
178       ierr = KSPSetUp(ksp);CHKERRQ(ierr);
179       ierr = PCFactorGetMatrix(pc, &fact);CHKERRQ(ierr);
180       ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
181       ierr = MatMatSolve(fact, Bd, AinvBd);CHKERRQ(ierr);
182     } else {
183       PetscBool ex = PETSC_TRUE;
184 
185       if (ex) {
186         Mat Ainvd;
187 
188         ierr = PCComputeExplicitOperator(pc, &Ainvd);CHKERRQ(ierr);
189         ierr = MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);CHKERRQ(ierr);
190         ierr = MatDestroy(&Ainvd);CHKERRQ(ierr);
191       } else {
192         Vec         sol,rhs;
193         PetscScalar *arrayrhs,*arraysol;
194         PetscInt    i,nrhs,n;
195 
196         ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
197         ierr = MatGetSize(Bd,&n,&nrhs);CHKERRQ(ierr);
198         ierr = MatDenseGetArray(Bd,&arrayrhs);CHKERRQ(ierr);
199         ierr = MatDenseGetArray(AinvBd,&arraysol);CHKERRQ(ierr);
200         ierr = KSPGetSolution(ksp,&sol);CHKERRQ(ierr);
201         ierr = KSPGetRhs(ksp,&rhs);CHKERRQ(ierr);
202         for (i=0;i<nrhs;i++) {
203           ierr = VecPlaceArray(rhs,arrayrhs+i*n);CHKERRQ(ierr);
204           ierr = VecPlaceArray(sol,arraysol+i*n);CHKERRQ(ierr);
205           ierr = KSPSolve(ksp,rhs,sol);CHKERRQ(ierr);
206           ierr = VecResetArray(rhs);CHKERRQ(ierr);
207           ierr = VecResetArray(sol);CHKERRQ(ierr);
208         }
209         ierr = MatDenseRestoreArray(Bd,&arrayrhs);CHKERRQ(ierr);
210         ierr = MatDenseRestoreArray(AinvBd,&arrayrhs);CHKERRQ(ierr);
211       }
212     }
213     if (!Bdense & !issym) {
214       ierr = MatDestroy(&Bd);CHKERRQ(ierr);
215     }
216 
217     if (!issym) {
218       if (!Cdense) {
219         ierr = MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);CHKERRQ(ierr);
220       } else {
221         Cd = C;
222       }
223       ierr = MatMatMult(Cd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
224       if (!Cdense) {
225         ierr = MatDestroy(&Cd);CHKERRQ(ierr);
226       }
227     } else {
228       ierr = MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);CHKERRQ(ierr);
229       if (!Bdense) {
230         ierr = MatDestroy(&Bd);CHKERRQ(ierr);
231       }
232     }
233     ierr = MatDestroy(&AinvBd);CHKERRQ(ierr);
234   }
235 
236   if (D) {
237     Mat       Dd;
238     PetscBool Ddense;
239 
240     ierr = PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);CHKERRQ(ierr);
241     if (!Ddense) {
242       ierr = MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);CHKERRQ(ierr);
243     } else {
244       Dd = D;
245     }
246     if (n_I) {
247       ierr = MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
248     } else {
249       if (reuse == MAT_INITIAL_MATRIX) {
250         ierr = MatDuplicate(Dd,MAT_COPY_VALUES,S);CHKERRQ(ierr);
251       } else {
252         ierr = MatCopy(Dd,*S,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
253       }
254     }
255     if (!Ddense) {
256       ierr = MatDestroy(&Dd);CHKERRQ(ierr);
257     }
258   } else {
259     ierr = MatScale(*S,-1.0);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "PCBDDCSubSchursSetUp"
266 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, PetscBool faster_deluxe, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick)
267 {
268   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
269   Mat                    S_all;
270   Mat                    global_schur_subsets,work_mat,*submats;
271   ISLocalToGlobalMapping l2gmap_subsets;
272   IS                     is_I,is_I_layer;
273   IS                     all_subsets,all_subsets_mult,all_subsets_n;
274   PetscInt               *nnz,*all_local_idx_N;
275   PetscInt               *auxnum1,*auxnum2;
276   PetscInt               i,subset_size,max_subset_size;
277   PetscInt               n_B,extra,local_size,global_size;
278   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
279   PetscScalar            *Bwork;
280   PetscSubcomm           subcomm;
281   PetscMPIInt            color,rank;
282   MPI_Comm               comm_n;
283   PetscBool              use_getr = PETSC_FALSE;
284   PetscErrorCode         ierr;
285 
286   PetscFunctionBegin;
287   /* update info in sub_schurs */
288   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
289   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
290   sub_schurs->is_hermitian = PETSC_FALSE;
291   sub_schurs->is_posdef = PETSC_FALSE;
292   if (Ain) {
293     PetscBool isseqaij;
294     /* determine if we are dealing with hermitian positive definite problems */
295 #if !defined(PETSC_USE_COMPLEX)
296     if (Ain->symmetric_set) {
297       sub_schurs->is_hermitian = Ain->symmetric;
298     }
299 #else
300     if (Ain->hermitian_set) {
301       sub_schurs->is_hermitian = Ain->hermitian;
302     }
303 #endif
304     if (Ain->spd_set) {
305       sub_schurs->is_posdef = Ain->spd;
306     }
307 
308     /* check */
309     ierr = PetscObjectTypeCompare((PetscObject)Ain,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
310     if (compute_Stilda && (!Ain->hermitian_set || !Ain->spd_set)) { /* these are lazy checks, maybe I should throw an error if they are not set */
311       PetscInt lsize;
312 
313       ierr = MatGetSize(Ain,&lsize,NULL);CHKERRQ(ierr);
314       if (lsize) {
315         PetscScalar val;
316         PetscReal   norm;
317         Vec         vec1,vec2,vec3;
318 
319         ierr = MatCreateVecs(Ain,&vec1,&vec2);CHKERRQ(ierr);
320         ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
321         ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
322         ierr = MatMult(Ain,vec1,vec2);CHKERRQ(ierr);
323 #if !defined(PETSC_USE_COMPLEX)
324         ierr = MatMultTranspose(Ain,vec1,vec3);CHKERRQ(ierr);
325 #else
326         ierr = MatMultHermitianTranspose(Ain,vec1,vec3);CHKERRQ(ierr);
327 #endif
328         ierr = VecAXPY(vec3,-1.0,vec2);CHKERRQ(ierr);
329         ierr = VecNorm(vec3,NORM_INFINITY,&norm);CHKERRQ(ierr);
330         if (!Ain->hermitian_set) {
331           if (norm > PetscSqrtReal(PETSC_SMALL)) {
332             sub_schurs->is_hermitian = PETSC_FALSE;
333           } else {
334             sub_schurs->is_hermitian = PETSC_TRUE;
335           }
336         }
337         if (!Ain->spd_set && !benign_trick) {
338           ierr = VecDot(vec1,vec2,&val);CHKERRQ(ierr);
339           if (PetscRealPart(val) > 0. && PetscAbsReal(PetscImaginaryPart(val)) < PETSC_SMALL) sub_schurs->is_posdef = PETSC_TRUE;
340         }
341         ierr = VecDestroy(&vec1);CHKERRQ(ierr);
342         ierr = VecDestroy(&vec2);CHKERRQ(ierr);
343         ierr = VecDestroy(&vec3);CHKERRQ(ierr);
344       } else {
345         sub_schurs->is_hermitian = PETSC_TRUE;
346         sub_schurs->is_posdef = PETSC_TRUE;
347       }
348     }
349     if (isseqaij) {
350       ierr = PetscObjectReference((PetscObject)Ain);CHKERRQ(ierr);
351       sub_schurs->A = Ain;
352     } else {
353       ierr = MatConvert(Ain,MATSEQAIJ,MAT_INITIAL_MATRIX,&sub_schurs->A);CHKERRQ(ierr);
354     }
355   }
356 
357   ierr = PetscObjectReference((PetscObject)Sin);CHKERRQ(ierr);
358   sub_schurs->S = Sin;
359   if (sub_schurs->use_mumps) {
360     sub_schurs->use_mumps = (PetscBool)(!!sub_schurs->A);
361   }
362 
363   /* preliminary checks */
364   if (!sub_schurs->use_mumps && compute_Stilda) {
365     SETERRQ(PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
366   }
367 
368   /* restrict work on active processes */
369   color = 0;
370   if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
371   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);CHKERRQ(ierr);
372   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);CHKERRQ(ierr);
373   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
374   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
375   ierr = PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);CHKERRQ(ierr);
376   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
377   if (!sub_schurs->n_subs) {
378     ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
379     PetscFunctionReturn(0);
380   }
381   /* ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL);CHKERRQ(ierr); */
382 
383   /* get Schur complement matrices */
384   if (!sub_schurs->use_mumps) {
385     Mat       tA_IB,tA_BI,tA_BB;
386     PetscBool isseqsbaij;
387     ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);CHKERRQ(ierr);
388     ierr = PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);CHKERRQ(ierr);
389     if (isseqsbaij) {
390       ierr = MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
391       ierr = MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
392       ierr = MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
393     } else {
394       ierr = PetscObjectReference((PetscObject)tA_BB);CHKERRQ(ierr);
395       A_BB = tA_BB;
396       ierr = PetscObjectReference((PetscObject)tA_IB);CHKERRQ(ierr);
397       A_IB = tA_IB;
398       ierr = PetscObjectReference((PetscObject)tA_BI);CHKERRQ(ierr);
399       A_BI = tA_BI;
400     }
401   } else {
402     A_II = NULL;
403     A_IB = NULL;
404     A_BI = NULL;
405     A_BB = NULL;
406   }
407   S_all = NULL;
408 
409   /* determine interior problems */
410   ierr = ISGetLocalSize(sub_schurs->is_I,&i);CHKERRQ(ierr);
411   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
412     PetscBT                touched;
413     const PetscInt*        idx_B;
414     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
415 
416     if (xadj == NULL || adjncy == NULL) {
417       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
418     }
419     /* get sizes */
420     ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
421     ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
422 
423     ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr);
424     ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr);
425     ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr);
426 
427     /* all boundary dofs must be skipped when adding layers */
428     ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
429     for (j=0;j<n_B;j++) {
430       ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr);
431     }
432     ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr);
433     ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
434 
435     /* add prescribed number of layers of dofs */
436     n_local_dofs = n_B;
437     n_prev_added = n_B;
438     for (layer=0;layer<nlayers;layer++) {
439       PetscInt n_added;
440       if (n_local_dofs == n_I+n_B) break;
441       if (n_local_dofs > n_I+n_B) {
442         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
443       }
444       ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr);
445       n_prev_added = n_added;
446       n_local_dofs += n_added;
447       if (!n_added) break;
448     }
449     ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
450 
451     /* IS for I layer dofs in original numbering */
452     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);CHKERRQ(ierr);
453     ierr = PetscFree(local_numbering);CHKERRQ(ierr);
454     ierr = ISSort(is_I_layer);CHKERRQ(ierr);
455     /* IS for I layer dofs in I numbering */
456     if (!sub_schurs->use_mumps) {
457       ISLocalToGlobalMapping ItoNmap;
458       ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr);
459       ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);CHKERRQ(ierr);
460       ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr);
461 
462       /* II block */
463       ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr);
464     }
465   } else {
466     PetscInt n_I;
467 
468     /* IS for I dofs in original numbering */
469     ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr);
470     is_I_layer = sub_schurs->is_I;
471 
472     /* IS for I dofs in I numbering (strided 1) */
473     if (!sub_schurs->use_mumps) {
474       ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
475       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr);
476 
477       /* II block is the same */
478       ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr);
479       AE_II = A_II;
480     }
481   }
482 
483   /* Get info on subset sizes and sum of all subsets sizes */
484   max_subset_size = 0;
485   local_size = 0;
486   for (i=0;i<sub_schurs->n_subs;i++) {
487     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
488     max_subset_size = PetscMax(subset_size,max_subset_size);
489     local_size += subset_size;
490   }
491 
492   /* Work arrays for local indices */
493   extra = 0;
494   ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
495   if (sub_schurs->use_mumps && is_I_layer) {
496     ierr = ISGetLocalSize(is_I_layer,&extra);CHKERRQ(ierr);
497   }
498   ierr = PetscMalloc1(n_B+extra,&all_local_idx_N);CHKERRQ(ierr);
499   if (extra) {
500     const PetscInt *idxs;
501     ierr = ISGetIndices(is_I_layer,&idxs);CHKERRQ(ierr);
502     ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr);
503     ierr = ISRestoreIndices(is_I_layer,&idxs);CHKERRQ(ierr);
504   }
505   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
506   ierr = PetscMalloc1(sub_schurs->n_subs,&auxnum1);CHKERRQ(ierr);
507   ierr = PetscMalloc1(sub_schurs->n_subs,&auxnum2);CHKERRQ(ierr);
508 
509   /* Get local indices in local numbering */
510   local_size = 0;
511   for (i=0;i<sub_schurs->n_subs;i++) {
512     PetscInt j;
513     const    PetscInt *idxs;
514 
515     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
516     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
517     /* start (smallest in global ordering) and multiplicity */
518     auxnum1[i] = idxs[0];
519     auxnum2[i] = subset_size;
520     /* subset indices in local numbering */
521     ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
522     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
523     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
524     local_size += subset_size;
525   }
526 
527   /* allocate extra workspace needed only for GETRI */
528   Bwork = NULL;
529   pivots = NULL;
530   if (local_size && !benign_trick && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
531     PetscScalar lwork;
532 
533     use_getr = PETSC_TRUE;
534     B_lwork = -1;
535     ierr = PetscBLASIntCast(local_size,&B_N);CHKERRQ(ierr);
536     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
537     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,Bwork,&B_N,pivots,&lwork,&B_lwork,&B_ierr));
538     ierr = PetscFPTrapPop();CHKERRQ(ierr);
539     if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
540     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
541     ierr = PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);CHKERRQ(ierr);
542   }
543 
544   /* prepare parallel matrices for summing up properly schurs on subsets */
545   ierr = ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);CHKERRQ(ierr);
546   ierr = ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);CHKERRQ(ierr);
547   ierr = ISDestroy(&all_subsets_n);CHKERRQ(ierr);
548   ierr = ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);CHKERRQ(ierr);
549   ierr = PCBDDCSubsetNumbering(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);CHKERRQ(ierr);
550   ierr = ISDestroy(&all_subsets);CHKERRQ(ierr);
551   ierr = ISDestroy(&all_subsets_mult);CHKERRQ(ierr);
552   ierr = ISGetLocalSize(all_subsets_n,&i);CHKERRQ(ierr);
553   if (i != local_size) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %d != %d",i,local_size);
555   }
556   ierr = ISLocalToGlobalMappingCreateIS(all_subsets_n,&l2gmap_subsets);CHKERRQ(ierr);
557   ierr = MatCreateIS(comm_n,1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,NULL,&work_mat);CHKERRQ(ierr);
558   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
559   ierr = MatCreate(PetscObjectComm((PetscObject)work_mat),&global_schur_subsets);CHKERRQ(ierr);
560   ierr = MatSetSizes(global_schur_subsets,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size);CHKERRQ(ierr);
561   ierr = MatSetType(global_schur_subsets,MATMPIAIJ);CHKERRQ(ierr);
562 
563   /* subset indices in local boundary numbering */
564   if (!sub_schurs->is_Ej_all) {
565     PetscInt *all_local_idx_B;
566 
567     ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
568     ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr);
569     if (subset_size != local_size) {
570       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
571     }
572     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr);
573   }
574 
575   /* Local matrix of all local Schur on subsets (transposed) */
576   if (!sub_schurs->S_Ej_all) {
577     ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
578     ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
579     ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
580     ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
581   }
582 
583   /* Compute Schur complements explicitly */
584   F = NULL;
585   if (!sub_schurs->use_mumps) { /* this code branch is used when MUMPS is not present; it is not very efficient, unless the economic version of the scaling is required */
586     Mat         S_Ej_expl;
587     PetscScalar *work;
588     PetscInt    j,*dummy_idx;
589     PetscBool   Sdense;
590 
591     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
592     local_size = 0;
593     for (i=0;i<sub_schurs->n_subs;i++) {
594       IS  is_subset_B;
595       Mat AE_EE,AE_IE,AE_EI,S_Ej;
596 
597       /* subsets in original and boundary numbering */
598       ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);CHKERRQ(ierr);
599       /* EE block */
600       ierr = MatGetSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);CHKERRQ(ierr);
601       /* IE block */
602       ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);CHKERRQ(ierr);
603       /* EI block */
604       if (sub_schurs->is_hermitian) {
605         ierr = MatCreateTranspose(AE_IE,&AE_EI);CHKERRQ(ierr);
606       } else {
607         ierr = MatGetSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);CHKERRQ(ierr);
608       }
609       ierr = ISDestroy(&is_subset_B);CHKERRQ(ierr);
610       ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);CHKERRQ(ierr);
611       ierr = MatDestroy(&AE_EE);CHKERRQ(ierr);
612       ierr = MatDestroy(&AE_IE);CHKERRQ(ierr);
613       ierr = MatDestroy(&AE_EI);CHKERRQ(ierr);
614       if (AE_II == A_II) { /* we can reuse the same ksp */
615         KSP ksp;
616         ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr);
617         ierr = MatSchurComplementSetKSP(S_Ej,ksp);CHKERRQ(ierr);
618       } else { /* build new ksp object which inherits ksp and pc types from the original one */
619         KSP       origksp,schurksp;
620         PC        origpc,schurpc;
621         KSPType   ksp_type;
622         PetscInt  n_internal;
623         PetscBool ispcnone;
624 
625         ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr);
626         ierr = MatSchurComplementGetKSP(S_Ej,&schurksp);CHKERRQ(ierr);
627         ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr);
628         ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr);
629         ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr);
630         ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr);
631         ierr = PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);CHKERRQ(ierr);
632         if (!ispcnone) {
633           PCType pc_type;
634           ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr);
635           ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr);
636         } else {
637           ierr = PCSetType(schurpc,PCLU);CHKERRQ(ierr);
638         }
639         ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr);
640         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
641           MatSolverPackage solver=NULL;
642           ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
643           if (solver) {
644             ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr);
645           }
646         }
647         ierr = KSPSetUp(schurksp);CHKERRQ(ierr);
648       }
649       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
650       ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);CHKERRQ(ierr);
651       ierr = PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_hermitian,MAT_REUSE_MATRIX,&S_Ej_expl);CHKERRQ(ierr);
652       ierr = PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);CHKERRQ(ierr);
653       if (Sdense) {
654         for (j=0;j<subset_size;j++) {
655           dummy_idx[j]=local_size+j;
656         }
657         ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
658       } else {
659         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
660       }
661       ierr = MatDestroy(&S_Ej);CHKERRQ(ierr);
662       ierr = MatDestroy(&S_Ej_expl);CHKERRQ(ierr);
663       local_size += subset_size;
664     }
665     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
666     /* free */
667     ierr = ISDestroy(&is_I);CHKERRQ(ierr);
668     ierr = MatDestroy(&AE_II);CHKERRQ(ierr);
669     ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
670   } else {
671     Mat         A;
672     IS          is_A_all;
673     PetscScalar *work,*S_data,*schur_factor;
674     PetscInt    n,*idxs_schur,n_I,*dummy_idx,size_schur,size_active_schur,cum,cum2;
675     PetscBool   economic,mumps_S,S_lower_triangular = PETSC_FALSE,factor_workaround;
676 
677     /* get sizes */
678     n_I = 0;
679     if (is_I_layer) {
680       ierr = ISGetLocalSize(is_I_layer,&n_I);CHKERRQ(ierr);
681     }
682     economic = PETSC_FALSE;
683     ierr = ISGetLocalSize(sub_schurs->is_I,&cum);CHKERRQ(ierr);
684     if (cum != n_I) economic = PETSC_TRUE;
685     ierr = MatGetLocalSize(sub_schurs->A,&n,NULL);CHKERRQ(ierr);
686 
687     /* size active schurs does not count any dirichlet or vertex dof on the interface */
688     size_active_schur = local_size;
689     cum = n_I+size_active_schur;
690     if (sub_schurs->is_dir) {
691       const PetscInt* idxs;
692       PetscInt        n_dir;
693 
694       ierr = ISGetLocalSize(sub_schurs->is_dir,&n_dir);CHKERRQ(ierr);
695       ierr = ISGetIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
696       ierr = PetscMemcpy(all_local_idx_N+cum,idxs,n_dir*sizeof(PetscInt));CHKERRQ(ierr);
697       ierr = ISRestoreIndices(sub_schurs->is_dir,&idxs);CHKERRQ(ierr);
698       cum += n_dir;
699     }
700     factor_workaround = PETSC_FALSE;
701     /* include the primal vertices in the Schur complement */
702     if (exact_schur && sub_schurs->is_vertices && compute_Stilda) {
703       PetscInt n_v;
704 
705       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_v);CHKERRQ(ierr);
706       if (n_v) {
707         const PetscInt* idxs;
708 
709         ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
710         ierr = PetscMemcpy(all_local_idx_N+cum,idxs,n_v*sizeof(PetscInt));CHKERRQ(ierr);
711         ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
712         cum += n_v;
713         factor_workaround = PETSC_TRUE;
714       }
715     }
716     size_schur = cum - n_I;
717     ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all);CHKERRQ(ierr);
718     /* get working mat (TODO: factorize without actually permuting)  */
719     if (cum == n) {
720       ierr = ISSetPermutation(is_A_all);CHKERRQ(ierr);
721       ierr = MatPermute(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr);
722     } else {
723       ierr = MatGetSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
724     }
725     ierr = MatSetOptionsPrefix(A,"sub_schurs_");CHKERRQ(ierr);
726     ierr = MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_hermitian);CHKERRQ(ierr);
727     ierr = MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);CHKERRQ(ierr);
728     ierr = MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);CHKERRQ(ierr);
729 
730     /* when using the benign subspace trick, the local Schur complements are SPD */
731     if (benign_trick) sub_schurs->is_posdef = PETSC_TRUE;
732 
733     if (n_I) { /* TODO add ordering from options */
734       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
735         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr);
736       } else {
737         ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr);
738       }
739       /* subsets ordered last */
740       ierr = PetscMalloc1(size_schur,&idxs_schur);CHKERRQ(ierr);
741       for (i=0;i<size_schur;i++) {
742         idxs_schur[i] = n_I+i+1;
743       }
744 #if defined(PETSC_HAVE_MUMPS)
745       ierr = MatMumpsSetSchurIndices(F,size_schur,idxs_schur);CHKERRQ(ierr);
746 #endif
747       ierr = PetscFree(idxs_schur);CHKERRQ(ierr);
748 
749       /* factorization step */
750       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
751         ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr);
752 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
753         ierr = MatMumpsSetIcntl(F,19,2);CHKERRQ(ierr);
754         if (sub_schurs->is_posdef) {
755           ierr = MatMumpsSchurComplementSetSym(F,1);CHKERRQ(ierr);
756         } else {
757           ierr = MatMumpsSchurComplementSetSym(F,2);CHKERRQ(ierr);
758         }
759 #endif
760         ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr);
761         S_lower_triangular = PETSC_TRUE;
762       } else {
763         ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr);
764 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
765         ierr = MatMumpsSetIcntl(F,19,3);CHKERRQ(ierr);
766 #endif
767         ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr);
768       }
769 
770       /* get explicit Schur Complement computed during numeric factorization */
771 #if defined(PETSC_HAVE_MUMPS)
772       ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr);
773 #endif
774       /* we can reuse the solvers if we are not using the economic version */
775       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
776       factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
777       mumps_S = PETSC_TRUE;
778     } else { /* we can't use MUMPS when size_schur == size_of_the_problem */
779       ierr = MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);CHKERRQ(ierr);
780       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
781       mumps_S = PETSC_FALSE;
782     }
783 
784     if (reuse_solvers) {
785       Mat              A_II;
786       Vec              vec1_B;
787       PCBDDCReuseMumps msolv_ctx;
788 
789       if (sub_schurs->reuse_mumps) {
790         ierr = PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);CHKERRQ(ierr);
791       } else {
792         ierr = PetscNew(&sub_schurs->reuse_mumps);CHKERRQ(ierr);
793       }
794       msolv_ctx = sub_schurs->reuse_mumps;
795       ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
796       ierr = PetscObjectReference((PetscObject)F);CHKERRQ(ierr);
797       msolv_ctx->F = F;
798       ierr = MatCreateVecs(F,&msolv_ctx->sol,NULL);CHKERRQ(ierr);
799       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
800       {
801         PetscScalar *array;
802         PetscInt    n;
803 
804         ierr = VecGetLocalSize(msolv_ctx->sol,&n);CHKERRQ(ierr);
805         ierr = VecGetArray(msolv_ctx->sol,&array);CHKERRQ(ierr);
806         ierr = VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs);CHKERRQ(ierr);
807         ierr = VecRestoreArray(msolv_ctx->sol,&array);CHKERRQ(ierr);
808       }
809       msolv_ctx->has_vertices = factor_workaround;
810 
811       /* interior solver */
812       ierr = PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver);CHKERRQ(ierr);
813       ierr = PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);CHKERRQ(ierr);
814       ierr = PCSetType(msolv_ctx->interior_solver,PCSHELL);CHKERRQ(ierr);
815       ierr = PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);CHKERRQ(ierr);
816       ierr = PCShellSetApply(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolve);CHKERRQ(ierr);
817       ierr = PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCMumpsInteriorSolveTranspose);CHKERRQ(ierr);
818 
819       /* correction solver */
820       ierr = PCCreate(PetscObjectComm((PetscObject)A),&msolv_ctx->correction_solver);CHKERRQ(ierr);
821       ierr = PCSetOperators(msolv_ctx->correction_solver,A,A);CHKERRQ(ierr);
822       ierr = PCSetType(msolv_ctx->correction_solver,PCSHELL);CHKERRQ(ierr);
823       ierr = PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);CHKERRQ(ierr);
824       ierr = PCShellSetApply(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolve);CHKERRQ(ierr);
825       ierr = PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCMumpsCorrectionSolveTranspose);CHKERRQ(ierr);
826 
827       /* scatter and vecs for Schur complement solver */
828       ierr = MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);CHKERRQ(ierr);
829       ierr = MatCreateVecs(sub_schurs->S,&vec1_B,NULL);CHKERRQ(ierr);
830       if (!factor_workaround) {
831         ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);CHKERRQ(ierr);
832         ierr = VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);CHKERRQ(ierr);
833         ierr = PetscObjectReference((PetscObject)is_A_all);CHKERRQ(ierr);
834         msolv_ctx->is_R = is_A_all;
835       } else {
836         IS              is_B_all;
837         const PetscInt* idxs;
838         PetscInt        dual,n_v,n;
839 
840         ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_v);CHKERRQ(ierr);
841         dual = size_schur - n_v;
842         ierr = ISGetLocalSize(is_A_all,&n);CHKERRQ(ierr);
843         ierr = ISGetIndices(is_A_all,&idxs);CHKERRQ(ierr);
844         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all);CHKERRQ(ierr);
845         ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B);CHKERRQ(ierr);
846         ierr = ISDestroy(&is_B_all);CHKERRQ(ierr);
847         ierr = ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all);CHKERRQ(ierr);
848         ierr = VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B);CHKERRQ(ierr);
849         ierr = ISDestroy(&is_B_all);CHKERRQ(ierr);
850         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R);CHKERRQ(ierr);
851         ierr = ISRestoreIndices(is_A_all,&idxs);CHKERRQ(ierr);
852       }
853       ierr = VecDestroy(&vec1_B);CHKERRQ(ierr);
854     }
855     ierr = MatDestroy(&A);CHKERRQ(ierr);
856     ierr = ISDestroy(&is_A_all);CHKERRQ(ierr);
857 
858     /* Work arrays */
859     ierr = PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);CHKERRQ(ierr);
860 
861     /* matrices for adaptive selection */
862     if (compute_Stilda) {
863       if (!sub_schurs->sum_S_Ej_tilda_all) {
864         ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
865         ierr = MatSetSizes(sub_schurs->sum_S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);CHKERRQ(ierr);
866         ierr = MatSetType(sub_schurs->sum_S_Ej_tilda_all,MATAIJ);CHKERRQ(ierr);
867         ierr = MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_tilda_all,0,nnz);CHKERRQ(ierr);
868       }
869       if (!sub_schurs->sum_S_Ej_inv_all) {
870         ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
871         ierr = MatSetSizes(sub_schurs->sum_S_Ej_inv_all,PETSC_DECIDE,PETSC_DECIDE,size_active_schur,size_active_schur);CHKERRQ(ierr);
872         ierr = MatSetType(sub_schurs->sum_S_Ej_inv_all,MATAIJ);CHKERRQ(ierr);
873         ierr = MatSeqAIJSetPreallocation(sub_schurs->sum_S_Ej_inv_all,0,nnz);CHKERRQ(ierr);
874       }
875     }
876 
877     /* S_Ej_all */
878     cum = cum2 = 0;
879     ierr = MatDenseGetArray(S_all,&S_data);CHKERRQ(ierr);
880     for (i=0;i<sub_schurs->n_subs;i++) {
881       PetscInt j;
882 
883       /* get S_E */
884       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
885       if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
886         PetscInt k;
887         for (k=0;k<subset_size;k++) {
888           for (j=k;j<subset_size;j++) {
889             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
890             work[j*subset_size+k] = PetscConj(S_data[cum2+k*size_schur+j]);
891           }
892         }
893       } else { /* just copy to workspace */
894         PetscInt k;
895         for (k=0;k<subset_size;k++) {
896           for (j=0;j<subset_size;j++) {
897             work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
898           }
899         }
900       }
901       /* insert S_E values */
902       for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
903       ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
904 
905       /* if adaptivity is requested, invert S_E blocks */
906       if (compute_Stilda) {
907         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
908         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
909         if (!use_getr) { /* TODO add sytrf/i for symmetric non hermitian */
910           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,work,&B_N,&B_ierr));
911           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
912           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,work,&B_N,&B_ierr));
913           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
914         } else {
915           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,work,&B_N,pivots,&B_ierr));
916           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
917           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,work,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
918           if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
919         }
920         ierr = PetscFPTrapPop();CHKERRQ(ierr);
921         ierr = MatSetValues(sub_schurs->sum_S_Ej_inv_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
922       }
923       cum += subset_size;
924       cum2 += subset_size*(size_schur + 1);
925     }
926     ierr = MatDenseRestoreArray(S_all,&S_data);CHKERRQ(ierr);
927 
928 #if defined(PETSC_HAVE_MUMPS)
929     if (mumps_S) {
930       ierr = MatMumpsRestoreSchurComplement(F,&S_all);CHKERRQ(ierr);
931     }
932 #endif
933 
934     schur_factor = NULL;
935     if (compute_Stilda && size_active_schur) {
936 
937       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur) { /* we already computed the inverse */
938         PetscInt j;
939         for (j=0;j<size_schur;j++) dummy_idx[j] = j;
940         ierr = MatSetValues(sub_schurs->sum_S_Ej_tilda_all,size_schur,dummy_idx,size_schur,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
941       } else {
942         Mat S_all_inv=NULL;
943         if (mumps_S) { /* use MatMumps calls to invert S */
944             /* let LAPACK factorize the Schur complement inside MUMPS code */
945 #if defined(PETSC_HAVE_MUMPS)
946           ierr = MatMumpsFactorizeSchurComplement(F);CHKERRQ(ierr);
947 #endif
948           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
949              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
950           if (factor_workaround) {
951 #if defined(PETSC_HAVE_MUMPS)
952             PetscScalar *data;
953 #endif
954             PetscInt nd = 0;
955             if (!sub_schurs->is_posdef) {
956               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
957             }
958 #if defined(PETSC_HAVE_MUMPS)
959             ierr = MatMumpsGetSchurComplement(F,&S_all_inv);CHKERRQ(ierr);
960 #endif
961             ierr = MatDenseGetArray(S_all_inv,&data);CHKERRQ(ierr);
962             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
963               ierr = ISGetLocalSize(sub_schurs->is_dir,&nd);CHKERRQ(ierr);
964             }
965             ierr = PetscMalloc1((size_active_schur*(size_active_schur+1))/2+nd,&schur_factor);CHKERRQ(ierr);
966             cum = 0;
967             for (i=0;i<size_active_schur;i++) {
968               ierr = PetscMemcpy(schur_factor+cum,data+i*(size_schur+1),(size_active_schur-i)*sizeof(PetscScalar));CHKERRQ(ierr);
969               cum += size_active_schur-i;
970             }
971             for (i=0;i<nd;i++) schur_factor[cum+i] = data[(i+size_active_schur)*(size_schur+1)];
972             /* invert without calling MatMumpsInvertSchurComplement, to avoid having schur->inverted set */
973             ierr = PetscBLASIntCast(size_schur,&B_N);CHKERRQ(ierr);
974             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
975             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,data,&B_N,&B_ierr));
976             ierr = PetscFPTrapPop();CHKERRQ(ierr);
977             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
978             ierr = MatDenseRestoreArray(S_all_inv,&data);CHKERRQ(ierr);
979           } else {
980 #if defined(PETSC_HAVE_MUMPS)
981             ierr = MatMumpsInvertSchurComplement(F);CHKERRQ(ierr);
982             ierr = MatMumpsGetSchurComplement(F,&S_all_inv);CHKERRQ(ierr);
983           }
984 #endif
985         } else { /* we need to invert explicitly since we are not using MUMPS for S */
986           ierr = PetscObjectReference((PetscObject)S_all);CHKERRQ(ierr);
987           S_all_inv = S_all;
988           ierr = MatDenseGetArray(S_all_inv,&S_data);CHKERRQ(ierr);
989           ierr = PetscBLASIntCast(size_schur,&B_N);CHKERRQ(ierr);
990           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
991           if (!use_getr) { /* TODO add sytrf/i for symmetric non hermitian */
992             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
993             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
994             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
995             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
996           } else {
997             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
998             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
999             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1000             if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1001           }
1002           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1003           ierr = MatDenseRestoreArray(S_all_inv,&S_data);CHKERRQ(ierr);
1004         }
1005         /* S_Ej_tilda_all */
1006         cum = cum2 = 0;
1007         ierr = MatDenseGetArray(S_all_inv,&S_data);CHKERRQ(ierr);
1008         for (i=0;i<sub_schurs->n_subs;i++) {
1009           PetscInt j;
1010 
1011           ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
1012           /* get (St^-1)_E */
1013           /* Here I don't need to expand to upper triangular since St^-1
1014              will be properly accessed later during adaptive selection */
1015           if (S_lower_triangular) {
1016             PetscInt k;
1017             for (k=0;k<subset_size;k++) {
1018               for (j=k;j<subset_size;j++) {
1019                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1020               }
1021             }
1022           } else { /* copy to workspace */
1023             PetscInt k;
1024             for (k=0;k<subset_size;k++) {
1025               for (j=0;j<subset_size;j++) {
1026                 work[k*subset_size+j] = S_data[cum2+k*size_schur+j];
1027               }
1028             }
1029           }
1030           for (j=0;j<subset_size;j++) dummy_idx[j] = cum+j;
1031           ierr = MatSetValues(sub_schurs->sum_S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);CHKERRQ(ierr);
1032           cum += subset_size;
1033           cum2 += subset_size*(size_schur + 1);
1034         }
1035         ierr = MatDenseRestoreArray(S_all_inv,&S_data);CHKERRQ(ierr);
1036 #if defined(PETSC_HAVE_MUMPS)
1037         if (mumps_S) {
1038           ierr = MatMumpsRestoreSchurComplement(F,&S_all_inv);CHKERRQ(ierr);
1039         }
1040 #endif
1041         ierr = MatDestroy(&S_all_inv);CHKERRQ(ierr);
1042       }
1043 
1044       /* move back factors to Schur data into F */
1045       if (factor_workaround) {
1046         Mat S_tmp;
1047         PetscInt nv,nd=0;
1048 
1049         if (!mumps_S) {
1050           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1051         }
1052         ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
1053 #if defined(PETSC_HAVE_MUMPS)
1054         ierr = MatMumpsGetSchurComplement(F,&S_tmp);CHKERRQ(ierr);
1055 #endif
1056         if (sub_schurs->is_posdef) {
1057           PetscScalar *data;
1058 
1059           ierr = MatZeroEntries(S_tmp);CHKERRQ(ierr);
1060           ierr = MatDenseGetArray(S_tmp,&data);CHKERRQ(ierr);
1061 
1062           if (S_lower_triangular) {
1063             cum = 0;
1064             for (i=0;i<size_active_schur;i++) {
1065               ierr = PetscMemcpy(data+i*(size_schur+1),schur_factor+cum,(size_active_schur-i)*sizeof(PetscScalar));CHKERRQ(ierr);
1066               cum += size_active_schur-i;
1067 	    }
1068           } else {
1069             ierr = PetscMemcpy(data,schur_factor,size_schur*size_schur*sizeof(PetscScalar));CHKERRQ(ierr);
1070           }
1071           if (sub_schurs->is_dir) {
1072             ierr = ISGetLocalSize(sub_schurs->is_dir,&nd);CHKERRQ(ierr);
1073 	    for (i=0;i<nd;i++) {
1074 	      data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1075 	    }
1076           }
1077           /* workaround: since I cannot modify the matrices used inside MUMPS for the Schur reduction and expansion phase,
1078              set the diagonal entry of the Schur factor to a very large value */
1079           for (i=size_active_schur+nd;i<size_schur;i++) {
1080             data[i*(size_schur+1)] = PETSC_MAX_REAL;
1081           }
1082           ierr = MatDenseRestoreArray(S_tmp,&data);CHKERRQ(ierr);
1083         } else {
1084           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1085         }
1086 #if defined(PETSC_HAVE_MUMPS)
1087         ierr = MatMumpsRestoreSchurComplement(F,&S_tmp);CHKERRQ(ierr);
1088 #endif
1089       }
1090     }
1091     ierr = PetscFree2(dummy_idx,work);CHKERRQ(ierr);
1092     ierr = PetscFree(schur_factor);CHKERRQ(ierr);
1093   }
1094   ierr = PetscFree(nnz);CHKERRQ(ierr);
1095   ierr = MatDestroy(&F);CHKERRQ(ierr);
1096   ierr = ISDestroy(&is_I_layer);CHKERRQ(ierr);
1097   ierr = MatDestroy(&S_all);CHKERRQ(ierr);
1098   ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1099   ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1100   ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1101   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1102   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103   if (compute_Stilda) {
1104     ierr = MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1105     ierr = MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1106     ierr = MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1107     ierr = MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1108   }
1109 
1110   /* Global matrix of all assembled Schur on subsets */
1111   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
1112   ierr = MatISSetMPIXAIJPreallocation_Private(work_mat,global_schur_subsets,PETSC_TRUE);CHKERRQ(ierr);
1113   ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
1114 
1115   /* Get local part of (\sum_j S_Ej) */
1116   if (!sub_schurs->sum_S_Ej_all) {
1117     ierr = MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_INITIAL_MATRIX,&submats);CHKERRQ(ierr);
1118     sub_schurs->sum_S_Ej_all = submats[0];
1119   } else {
1120     ierr = PetscMalloc1(1,&submats);CHKERRQ(ierr);
1121     submats[0] = sub_schurs->sum_S_Ej_all;
1122     ierr = MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr);
1123   }
1124 
1125   /* Compute explicitly (\sum_j S_Ej)^-1 (faster scaling during PCApply, needs extra work when doing setup) */
1126   if (faster_deluxe) {
1127     Mat         tmpmat;
1128     PetscScalar *array;
1129     PetscInt    cum;
1130 
1131     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
1132     cum = 0;
1133     for (i=0;i<sub_schurs->n_subs;i++) {
1134       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
1135       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
1136       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1137       if (!use_getr) {
1138         PetscInt j,k;
1139 
1140         PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1141         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1142         PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1143         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1144         for (j=0;j<B_N;j++) {
1145           for (k=j+1;k<B_N;k++) {
1146             array[k*B_N+j+cum] = array[j*B_N+k+cum];
1147           }
1148         }
1149       } else {
1150         PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1151         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1152         PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1153         if (B_ierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1154       }
1155       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1156       cum += subset_size*subset_size;
1157     }
1158     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&array);CHKERRQ(ierr);
1159     ierr = MatMatMult(sub_schurs->S_Ej_all,sub_schurs->sum_S_Ej_all,MAT_INITIAL_MATRIX,1.0,&tmpmat);CHKERRQ(ierr);
1160     ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
1161     ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
1162     sub_schurs->S_Ej_all = tmpmat;
1163   }
1164 
1165   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1166   if (compute_Stilda) {
1167     ierr = MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
1168     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
1169     submats[0] = sub_schurs->sum_S_Ej_tilda_all;
1170     ierr = MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr);
1171     ierr = MatISSetLocalMat(work_mat,sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
1172     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
1173     submats[0] = sub_schurs->sum_S_Ej_inv_all;
1174     ierr = MatGetSubMatrices(global_schur_subsets,1,&all_subsets_n,&all_subsets_n,MAT_REUSE_MATRIX,&submats);CHKERRQ(ierr);
1175   }
1176 
1177   /* free workspace */
1178   ierr = PetscFree(submats);CHKERRQ(ierr);
1179   ierr = PetscFree2(Bwork,pivots);CHKERRQ(ierr);
1180   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
1181   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1182   ierr = ISDestroy(&all_subsets_n);CHKERRQ(ierr);
1183   ierr = PetscCommDestroy(&comm_n);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "PCBDDCSubSchursInit"
1189 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap)
1190 {
1191   IS              *faces,*edges,*all_cc,vertices;
1192   PetscInt        i,n_faces,n_edges,n_all_cc;
1193   PetscBool       is_sorted;
1194   PetscErrorCode  ierr;
1195 
1196   PetscFunctionBegin;
1197   ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr);
1198   if (!is_sorted) {
1199     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1200   }
1201   ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr);
1202   if (!is_sorted) {
1203     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1204   }
1205 
1206   /* reset any previous data */
1207   ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr);
1208 
1209   /* get index sets for faces and edges (already sorted by global ordering) */
1210   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);CHKERRQ(ierr);
1211   n_all_cc = n_faces+n_edges;
1212   ierr = PetscBTCreate(n_all_cc,&sub_schurs->is_edge);CHKERRQ(ierr);
1213   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
1214   for (i=0;i<n_faces;i++) {
1215     all_cc[i] = faces[i];
1216   }
1217   for (i=0;i<n_edges;i++) {
1218     all_cc[n_faces+i] = edges[i];
1219     ierr = PetscBTSet(sub_schurs->is_edge,n_faces+i);CHKERRQ(ierr);
1220   }
1221   ierr = PetscFree(faces);CHKERRQ(ierr);
1222   ierr = PetscFree(edges);CHKERRQ(ierr);
1223   sub_schurs->is_dir = NULL;
1224   ierr = PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);CHKERRQ(ierr);
1225 
1226   /* Determine if MUMPS can be used */
1227   sub_schurs->use_mumps = PETSC_FALSE;
1228 #if defined(PETSC_HAVE_MUMPS)
1229   sub_schurs->use_mumps = PETSC_TRUE;
1230 #endif
1231 
1232   ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr);
1233   sub_schurs->is_I = is_I;
1234   ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr);
1235   sub_schurs->is_B = is_B;
1236   ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr);
1237   sub_schurs->l2gmap = graph->l2gmap;
1238   ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr);
1239   sub_schurs->BtoNmap = BtoNmap;
1240   sub_schurs->n_subs = n_all_cc;
1241   sub_schurs->is_subs = all_cc;
1242   if (!sub_schurs->use_mumps) { /* sort by local ordering mumps is not present */
1243     for (i=0;i<sub_schurs->n_subs;i++) {
1244       ierr = ISSort(sub_schurs->is_subs[i]);CHKERRQ(ierr);
1245     }
1246   }
1247   sub_schurs->is_vertices = vertices;
1248   sub_schurs->S_Ej_all = NULL;
1249   sub_schurs->sum_S_Ej_all = NULL;
1250   sub_schurs->sum_S_Ej_inv_all = NULL;
1251   sub_schurs->sum_S_Ej_tilda_all = NULL;
1252   sub_schurs->is_Ej_all = NULL;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "PCBDDCSubSchursCreate"
1258 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1259 {
1260   PCBDDCSubSchurs schurs_ctx;
1261   PetscErrorCode  ierr;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr);
1265   schurs_ctx->n_subs = 0;
1266   *sub_schurs = schurs_ctx;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "PCBDDCSubSchursDestroy"
1272 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
1273 {
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277   ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr);
1278   ierr = PetscFree(*sub_schurs);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "PCBDDCSubSchursReset"
1284 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1285 {
1286   PetscInt       i;
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
1291   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
1292   ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr);
1293   ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr);
1294   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr);
1295   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr);
1296   ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
1297   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
1298   ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
1299   ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
1300   ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr);
1301   ierr = ISDestroy(&sub_schurs->is_vertices);CHKERRQ(ierr);
1302   ierr = ISDestroy(&sub_schurs->is_dir);CHKERRQ(ierr);
1303   ierr = PetscBTDestroy(&sub_schurs->is_edge);CHKERRQ(ierr);
1304   for (i=0;i<sub_schurs->n_subs;i++) {
1305     ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr);
1306   }
1307   if (sub_schurs->n_subs) {
1308     ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr);
1309   }
1310   if (sub_schurs->reuse_mumps) {
1311     ierr = PCBDDCReuseMumpsReset(sub_schurs->reuse_mumps);CHKERRQ(ierr);
1312   }
1313   ierr = PetscFree(sub_schurs->reuse_mumps);CHKERRQ(ierr);
1314   sub_schurs->n_subs = 0;
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private"
1320 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
1321 {
1322   PetscInt       i,j,n;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   n = 0;
1327   for (i=-n_prev;i<0;i++) {
1328     PetscInt start_dof = queue_tip[i];
1329     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
1330       PetscInt dof = adjncy[j];
1331       if (!PetscBTLookup(touched,dof)) {
1332         ierr = PetscBTSet(touched,dof);CHKERRQ(ierr);
1333         queue_tip[n] = dof;
1334         n++;
1335       }
1336     }
1337   }
1338   *n_added = n;
1339   PetscFunctionReturn(0);
1340 }
1341