xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision a0c1843407dcb9509b3c2f65f771c3f8bf8d8806)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14   PetscScalar    *array;
15   PetscScalar    *array2;
16 
17   PetscFunctionBegin;
18   if (!ctx->benign_n) PetscFunctionReturn(0);
19   if (sol && full) {
20     PetscInt n_I,size_schur;
21 
22     /* get sizes */
23     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
24     PetscCall(VecGetSize(v,&n_I));
25     n_I = n_I - size_schur;
26     /* get schur sol from array */
27     PetscCall(VecGetArray(v,&array));
28     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
29     PetscCall(VecRestoreArray(v,&array));
30     /* apply interior sol correction */
31     PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work));
32     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33     PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v));
34   }
35   if (v2) {
36     PetscInt nl;
37 
38     PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
39     PetscCall(VecGetLocalSize(v2,&nl));
40     PetscCall(VecGetArray(v2,&array2));
41     PetscCall(PetscArraycpy(array2,array,nl));
42   } else {
43     PetscCall(VecGetArray(v,&array));
44     array2 = array;
45   }
46   if (!sol) { /* change rhs */
47     PetscInt n;
48     for (n=0;n<ctx->benign_n;n++) {
49       PetscScalar    sum = 0.;
50       const PetscInt *cols;
51       PetscInt       nz,i;
52 
53       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
54       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
55       for (i=0;i<nz-1;i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
58 #else
59       sum = -sum/nz;
60 #endif
61       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
62       ctx->benign_save_vals[n] = array2[cols[nz-1]];
63       array2[cols[nz-1]] = sum;
64       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
65     }
66   } else {
67     PetscInt n;
68     for (n=0;n<ctx->benign_n;n++) {
69       PetscScalar    sum = 0.;
70       const PetscInt *cols;
71       PetscInt       nz,i;
72       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
73       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
74       for (i=0;i<nz-1;i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
77 #else
78       sum = -sum/nz;
79 #endif
80       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
81       array2[cols[nz-1]] = ctx->benign_save_vals[n];
82       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
83     }
84   }
85   if (v2) {
86     PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
87     PetscCall(VecRestoreArray(v2,&array2));
88   } else {
89     PetscCall(VecRestoreArray(v,&array));
90   }
91   if (!sol && full) {
92     Vec      usedv;
93     PetscInt n_I,size_schur;
94 
95     /* get sizes */
96     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
97     PetscCall(VecGetSize(v,&n_I));
98     n_I = n_I - size_schur;
99     /* compute schur rhs correction */
100     if (v2) {
101       usedv = v2;
102     } else {
103       usedv = v;
104     }
105     /* apply schur rhs correction */
106     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work));
107     PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array));
108     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
109     PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array));
110     PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec));
111     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112   }
113   PetscFunctionReturn(0);
114 }
115 
116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118   PCBDDCReuseSolvers ctx;
119   PetscBool          copy = PETSC_FALSE;
120 
121   PetscFunctionBegin;
122   PetscCall(PCShellGetContext(pc,&ctx));
123   if (full) {
124 #if defined(PETSC_HAVE_MUMPS)
125     PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
126 #endif
127 #if defined(PETSC_HAVE_MKL_PARDISO)
128     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
129 #endif
130     copy = ctx->has_vertices;
131   } else { /* interior solver */
132 #if defined(PETSC_HAVE_MUMPS)
133     PetscCall(MatMumpsSetIcntl(ctx->F,26,0));
134 #endif
135 #if defined(PETSC_HAVE_MKL_PARDISO)
136     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1));
137 #endif
138     copy = PETSC_TRUE;
139   }
140   /* copy rhs into factored matrix workspace */
141   if (copy) {
142     PetscInt    n;
143     PetscScalar *array,*array_solver;
144 
145     PetscCall(VecGetLocalSize(rhs,&n));
146     PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array));
147     PetscCall(VecGetArray(ctx->rhs,&array_solver));
148     PetscCall(PetscArraycpy(array_solver,array,n));
149     PetscCall(VecRestoreArray(ctx->rhs,&array_solver));
150     PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array));
151 
152     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full));
153     if (transpose) {
154       PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol));
155     } else {
156       PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol));
157     }
158     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full));
159 
160     /* get back data to caller worskpace */
161     PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
162     PetscCall(VecGetArray(sol,&array));
163     PetscCall(PetscArraycpy(array,array_solver,n));
164     PetscCall(VecRestoreArray(sol,&array));
165     PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
166   } else {
167     if (ctx->benign_n) {
168       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full));
169       if (transpose) {
170         PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol));
171       } else {
172         PetscCall(MatSolve(ctx->F,ctx->rhs,sol));
173       }
174       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full));
175     } else {
176       if (transpose) {
177         PetscCall(MatSolveTranspose(ctx->F,rhs,sol));
178       } else {
179         PetscCall(MatSolve(ctx->F,rhs,sol));
180       }
181     }
182   }
183   /* restore defaults */
184 #if defined(PETSC_HAVE_MUMPS)
185   PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
186 #endif
187 #if defined(PETSC_HAVE_MKL_PARDISO)
188   PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
189 #endif
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194 {
195   PetscFunctionBegin;
196   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201 {
202   PetscFunctionBegin;
203   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208 {
209   PetscFunctionBegin;
210   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE));
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215 {
216   PetscFunctionBegin;
217   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222 {
223   PCBDDCReuseSolvers ctx;
224   PetscBool          iascii;
225 
226   PetscFunctionBegin;
227   PetscCall(PCShellGetContext(pc,&ctx));
228   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
229   if (iascii) {
230     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO));
231   }
232   PetscCall(MatView(ctx->F,viewer));
233   if (iascii) {
234     PetscCall(PetscViewerPopFormat(viewer));
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
240 {
241   PetscInt       i;
242 
243   PetscFunctionBegin;
244   PetscCall(MatDestroy(&reuse->F));
245   PetscCall(VecDestroy(&reuse->sol));
246   PetscCall(VecDestroy(&reuse->rhs));
247   PetscCall(PCDestroy(&reuse->interior_solver));
248   PetscCall(PCDestroy(&reuse->correction_solver));
249   PetscCall(ISDestroy(&reuse->is_R));
250   PetscCall(ISDestroy(&reuse->is_B));
251   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
252   PetscCall(VecDestroy(&reuse->sol_B));
253   PetscCall(VecDestroy(&reuse->rhs_B));
254   for (i=0;i<reuse->benign_n;i++) {
255     PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
256   }
257   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
258   PetscCall(PetscFree(reuse->benign_save_vals));
259   PetscCall(MatDestroy(&reuse->benign_csAIB));
260   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
261   PetscCall(VecDestroy(&reuse->benign_corr_work));
262   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
263   PetscFunctionReturn(0);
264 }
265 
266 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
267 {
268   PCBDDCReuseSolvers ctx;
269 
270   PetscFunctionBegin;
271   PetscCall(PCShellGetContext(pc,&ctx));
272   PetscCall(PCBDDCReuseSolversReset(ctx));
273   PetscCall(PetscFree(ctx));
274   PetscCall(PCShellSetContext(pc,NULL));
275   PetscFunctionReturn(0);
276 }
277 
278 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
279 {
280   Mat            B, C, D, Bd, Cd, AinvBd;
281   KSP            ksp;
282   PC             pc;
283   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
284   PetscReal      fill = 2.0;
285   PetscInt       n_I;
286   PetscMPIInt    size;
287 
288   PetscFunctionBegin;
289   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size));
290   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
291   if (reuse == MAT_REUSE_MATRIX) {
292     PetscBool Sdense;
293 
294     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
295     PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
296   }
297   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
298   PetscCall(MatSchurComplementGetKSP(M, &ksp));
299   PetscCall(KSPGetPC(ksp, &pc));
300   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU));
301   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU));
302   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL));
303   PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense));
304   PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense));
305   PetscCall(MatGetSize(B,&n_I,NULL));
306   if (n_I) {
307     if (!Bdense) {
308       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
309     } else {
310       Bd = B;
311     }
312 
313     if (isLU || isILU || isCHOL) {
314       Mat fact;
315       PetscCall(KSPSetUp(ksp));
316       PetscCall(PCFactorGetMatrix(pc, &fact));
317       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
318       PetscCall(MatMatSolve(fact, Bd, AinvBd));
319     } else {
320       PetscBool ex = PETSC_TRUE;
321 
322       if (ex) {
323         Mat Ainvd;
324 
325         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
326         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
327         PetscCall(MatDestroy(&Ainvd));
328       } else {
329         Vec         sol,rhs;
330         PetscScalar *arrayrhs,*arraysol;
331         PetscInt    i,nrhs,n;
332 
333         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
334         PetscCall(MatGetSize(Bd,&n,&nrhs));
335         PetscCall(MatDenseGetArray(Bd,&arrayrhs));
336         PetscCall(MatDenseGetArray(AinvBd,&arraysol));
337         PetscCall(KSPGetSolution(ksp,&sol));
338         PetscCall(KSPGetRhs(ksp,&rhs));
339         for (i=0;i<nrhs;i++) {
340           PetscCall(VecPlaceArray(rhs,arrayrhs+i*n));
341           PetscCall(VecPlaceArray(sol,arraysol+i*n));
342           PetscCall(KSPSolve(ksp,rhs,sol));
343           PetscCall(VecResetArray(rhs));
344           PetscCall(VecResetArray(sol));
345         }
346         PetscCall(MatDenseRestoreArray(Bd,&arrayrhs));
347         PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs));
348       }
349     }
350     if (!Bdense & !issym) {
351       PetscCall(MatDestroy(&Bd));
352     }
353 
354     if (!issym) {
355       if (!Cdense) {
356         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
357       } else {
358         Cd = C;
359       }
360       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
361       if (!Cdense) {
362         PetscCall(MatDestroy(&Cd));
363       }
364     } else {
365       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
366       if (!Bdense) {
367         PetscCall(MatDestroy(&Bd));
368       }
369     }
370     PetscCall(MatDestroy(&AinvBd));
371   }
372 
373   if (D) {
374     Mat       Dd;
375     PetscBool Ddense;
376 
377     PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense));
378     if (!Ddense) {
379       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
380     } else {
381       Dd = D;
382     }
383     if (n_I) {
384       PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN));
385     } else {
386       if (reuse == MAT_INITIAL_MATRIX) {
387         PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S));
388       } else {
389         PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN));
390       }
391     }
392     if (!Ddense) {
393       PetscCall(MatDestroy(&Dd));
394     }
395   } else {
396     PetscCall(MatScale(*S,-1.0));
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
402 {
403   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
404   Mat                    S_all;
405   Vec                    gstash,lstash;
406   VecScatter             sstash;
407   IS                     is_I,is_I_layer;
408   IS                     all_subsets,all_subsets_mult,all_subsets_n;
409   PetscScalar            *stasharray,*Bwork;
410   PetscInt               *nnz,*all_local_idx_N;
411   PetscInt               *auxnum1,*auxnum2;
412   PetscInt               i,subset_size,max_subset_size;
413   PetscInt               n_B,extra,local_size,global_size;
414   PetscInt               local_stash_size;
415   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
416   MPI_Comm               comm_n;
417   PetscBool              deluxe = PETSC_TRUE;
418   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
419   PetscViewer            matl_dbg_viewer = NULL;
420   PetscBool              flg;
421 
422   PetscFunctionBegin;
423   PetscCall(MatDestroy(&sub_schurs->A));
424   PetscCall(MatDestroy(&sub_schurs->S));
425   if (Ain) {
426     PetscCall(PetscObjectReference((PetscObject)Ain));
427     sub_schurs->A = Ain;
428   }
429 
430   PetscCall(PetscObjectReference((PetscObject)Sin));
431   sub_schurs->S = Sin;
432   if (sub_schurs->schur_explicit) {
433     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
434   }
435 
436   /* preliminary checks */
437   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
438 
439   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
440 
441   /* debug (MATLAB) */
442   if (sub_schurs->debug) {
443     PetscMPIInt size,rank;
444     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
445 
446     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size));
447     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
448     nr   = size;
449     PetscCall(PetscMalloc1(nr,&print_schurs_ranks));
450     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
451     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg));
452     if (!flg) print_schurs = PETSC_TRUE;
453     else {
454       print_schurs = PETSC_FALSE;
455       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
456     }
457     PetscOptionsEnd();
458     PetscCall(PetscFree(print_schurs_ranks));
459     if (print_schurs) {
460       char filename[256];
461 
462       PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank));
463       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer));
464       PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB));
465     }
466   }
467 
468   /* restrict work on active processes */
469   if (sub_schurs->restrict_comm) {
470     PetscSubcomm subcomm;
471     PetscMPIInt  color,rank;
472 
473     color = 0;
474     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
475     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
476     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm));
477     PetscCall(PetscSubcommSetNumber(subcomm,2));
478     PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank));
479     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL));
480     PetscCall(PetscSubcommDestroy(&subcomm));
481     if (!sub_schurs->n_subs) {
482       PetscCall(PetscCommDestroy(&comm_n));
483       PetscFunctionReturn(0);
484     }
485   } else {
486     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL));
487   }
488 
489   /* get Schur complement matrices */
490   if (!sub_schurs->schur_explicit) {
491     Mat       tA_IB,tA_BI,tA_BB;
492     PetscBool isseqsbaij;
493     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB));
494     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij));
495     if (isseqsbaij) {
496       PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB));
497       PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB));
498       PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI));
499     } else {
500       PetscCall(PetscObjectReference((PetscObject)tA_BB));
501       A_BB = tA_BB;
502       PetscCall(PetscObjectReference((PetscObject)tA_IB));
503       A_IB = tA_IB;
504       PetscCall(PetscObjectReference((PetscObject)tA_BI));
505       A_BI = tA_BI;
506     }
507   } else {
508     A_II = NULL;
509     A_IB = NULL;
510     A_BI = NULL;
511     A_BB = NULL;
512   }
513   S_all = NULL;
514 
515   /* determine interior problems */
516   PetscCall(ISGetLocalSize(sub_schurs->is_I,&i));
517   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
518     PetscBT                touched;
519     const PetscInt*        idx_B;
520     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
521 
522     PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
523     /* get sizes */
524     PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I));
525     PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
526 
527     PetscCall(PetscMalloc1(n_I+n_B,&local_numbering));
528     PetscCall(PetscBTCreate(n_I+n_B,&touched));
529     PetscCall(PetscBTMemzero(n_I+n_B,touched));
530 
531     /* all boundary dofs must be skipped when adding layers */
532     PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B));
533     for (j=0;j<n_B;j++) {
534       PetscCall(PetscBTSet(touched,idx_B[j]));
535     }
536     PetscCall(PetscArraycpy(local_numbering,idx_B,n_B));
537     PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B));
538 
539     /* add prescribed number of layers of dofs */
540     n_local_dofs = n_B;
541     n_prev_added = n_B;
542     for (layer=0;layer<nlayers;layer++) {
543       PetscInt n_added = 0;
544       if (n_local_dofs == n_I+n_B) break;
545       PetscCheck(n_local_dofs <= n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")",layer,n_local_dofs,n_I+n_B);
546       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added));
547       n_prev_added = n_added;
548       n_local_dofs += n_added;
549       if (!n_added) break;
550     }
551     PetscCall(PetscBTDestroy(&touched));
552 
553     /* IS for I layer dofs in original numbering */
554     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer));
555     PetscCall(PetscFree(local_numbering));
556     PetscCall(ISSort(is_I_layer));
557     /* IS for I layer dofs in I numbering */
558     if (!sub_schurs->schur_explicit) {
559       ISLocalToGlobalMapping ItoNmap;
560       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap));
561       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I));
562       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
563 
564       /* II block */
565       PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II));
566     }
567   } else {
568     PetscInt n_I;
569 
570     /* IS for I dofs in original numbering */
571     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
572     is_I_layer = sub_schurs->is_I;
573 
574     /* IS for I dofs in I numbering (strided 1) */
575     if (!sub_schurs->schur_explicit) {
576       PetscCall(ISGetSize(sub_schurs->is_I,&n_I));
577       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I));
578 
579       /* II block is the same */
580       PetscCall(PetscObjectReference((PetscObject)A_II));
581       AE_II = A_II;
582     }
583   }
584 
585   /* Get info on subset sizes and sum of all subsets sizes */
586   max_subset_size = 0;
587   local_size = 0;
588   for (i=0;i<sub_schurs->n_subs;i++) {
589     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
590     max_subset_size = PetscMax(subset_size,max_subset_size);
591     local_size += subset_size;
592   }
593 
594   /* Work arrays for local indices */
595   extra = 0;
596   PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
597   if (sub_schurs->schur_explicit && is_I_layer) {
598     PetscCall(ISGetLocalSize(is_I_layer,&extra));
599   }
600   PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N));
601   if (extra) {
602     const PetscInt *idxs;
603     PetscCall(ISGetIndices(is_I_layer,&idxs));
604     PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra));
605     PetscCall(ISRestoreIndices(is_I_layer,&idxs));
606   }
607   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1));
608   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2));
609 
610   /* Get local indices in local numbering */
611   local_size = 0;
612   local_stash_size = 0;
613   for (i=0;i<sub_schurs->n_subs;i++) {
614     const PetscInt *idxs;
615 
616     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
617     PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs));
618     /* start (smallest in global ordering) and multiplicity */
619     auxnum1[i] = idxs[0];
620     auxnum2[i] = subset_size*subset_size;
621     /* subset indices in local numbering */
622     PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size));
623     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
624     local_size += subset_size;
625     local_stash_size += subset_size*subset_size;
626   }
627 
628   /* allocate extra workspace needed only for GETRI or SYTRF */
629   use_potr = use_sytr = PETSC_FALSE;
630   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
631     use_potr = PETSC_TRUE;
632   } else if (sub_schurs->is_symmetric) {
633     use_sytr = PETSC_TRUE;
634   }
635   if (local_size && !use_potr) {
636     PetscScalar  lwork,dummyscalar = 0.;
637     PetscBLASInt dummyint = 0;
638 
639     B_lwork = -1;
640     PetscCall(PetscBLASIntCast(local_size,&B_N));
641     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
642     if (use_sytr) {
643       PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
644       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
645     } else {
646       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
647       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
648     }
649     PetscCall(PetscFPTrapPop());
650     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
651     PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots));
652   } else {
653     Bwork = NULL;
654     pivots = NULL;
655   }
656 
657   /* prepare data for summing up properly schurs on subsets */
658   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n));
659   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets));
660   PetscCall(ISDestroy(&all_subsets_n));
661   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult));
662   PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n));
663   PetscCall(ISDestroy(&all_subsets));
664   PetscCall(ISDestroy(&all_subsets_mult));
665   PetscCall(ISGetLocalSize(all_subsets_n,&i));
666   PetscCheck(i == local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT,i,local_stash_size);
667   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash));
668   PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash));
669   PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash));
670   PetscCall(ISDestroy(&all_subsets_n));
671 
672   /* subset indices in local boundary numbering */
673   if (!sub_schurs->is_Ej_all) {
674     PetscInt *all_local_idx_B;
675 
676     PetscCall(PetscMalloc1(local_size,&all_local_idx_B));
677     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B));
678     PetscCheck(subset_size == local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT,subset_size,local_size);
679     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all));
680   }
681 
682   if (change) {
683     ISLocalToGlobalMapping BtoS;
684     IS                     change_primal_B;
685     IS                     change_primal_all;
686 
687     PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
688     PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
689     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub));
690     for (i=0;i<sub_schurs->n_subs;i++) {
691       ISLocalToGlobalMapping NtoS;
692       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS));
693       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]));
694       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
695     }
696     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B));
697     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS));
698     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all));
699     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
700     PetscCall(ISDestroy(&change_primal_B));
701     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change));
702     for (i=0;i<sub_schurs->n_subs;i++) {
703       Mat change_sub;
704 
705       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
706       PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]));
707       PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY));
708       if (!sub_schurs->change_with_qr) {
709         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub));
710       } else {
711         Mat change_subt;
712         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt));
713         PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub));
714         PetscCall(MatDestroy(&change_subt));
715       }
716       PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub));
717       PetscCall(MatDestroy(&change_sub));
718       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix));
719       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_"));
720     }
721     PetscCall(ISDestroy(&change_primal_all));
722   }
723 
724   /* Local matrix of all local Schur on subsets (transposed) */
725   if (!sub_schurs->S_Ej_all) {
726     Mat         T;
727     PetscScalar *v;
728     PetscInt    *ii,*jj;
729     PetscInt    cum,i,j,k;
730 
731     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
732        Allocate properly a representative matrix and duplicate */
733     PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v));
734     ii[0] = 0;
735     cum   = 0;
736     for (i=0;i<sub_schurs->n_subs;i++) {
737       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
738       for (j=0;j<subset_size;j++) {
739         const PetscInt row = cum+j;
740         PetscInt col = cum;
741 
742         ii[row+1] = ii[row] + subset_size;
743         for (k=ii[row];k<ii[row+1];k++) {
744           jj[k] = col;
745           col++;
746         }
747       }
748       cum += subset_size;
749     }
750     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T));
751     PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all));
752     PetscCall(MatDestroy(&T));
753     PetscCall(PetscFree3(ii,jj,v));
754   }
755   /* matrices for deluxe scaling and adaptive selection */
756   if (compute_Stilda) {
757     if (!sub_schurs->sum_S_Ej_tilda_all) {
758       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all));
759     }
760     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
761       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all));
762     }
763   }
764 
765   /* Compute Schur complements explicitly */
766   F = NULL;
767   if (!sub_schurs->schur_explicit) {
768     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
769        it is not efficient, unless the economic version of the scaling is used */
770     Mat         S_Ej_expl;
771     PetscScalar *work;
772     PetscInt    j,*dummy_idx;
773     PetscBool   Sdense;
774 
775     PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work));
776     local_size = 0;
777     for (i=0;i<sub_schurs->n_subs;i++) {
778       IS  is_subset_B;
779       Mat AE_EE,AE_IE,AE_EI,S_Ej;
780 
781       /* subsets in original and boundary numbering */
782       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B));
783       /* EE block */
784       PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE));
785       /* IE block */
786       PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE));
787       /* EI block */
788       if (sub_schurs->is_symmetric) {
789         PetscCall(MatCreateTranspose(AE_IE,&AE_EI));
790       } else if (sub_schurs->is_hermitian) {
791         PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI));
792       } else {
793         PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI));
794       }
795       PetscCall(ISDestroy(&is_subset_B));
796       PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej));
797       PetscCall(MatDestroy(&AE_EE));
798       PetscCall(MatDestroy(&AE_IE));
799       PetscCall(MatDestroy(&AE_EI));
800       if (AE_II == A_II) { /* we can reuse the same ksp */
801         KSP ksp;
802         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp));
803         PetscCall(MatSchurComplementSetKSP(S_Ej,ksp));
804       } else { /* build new ksp object which inherits ksp and pc types from the original one */
805         KSP       origksp,schurksp;
806         PC        origpc,schurpc;
807         KSPType   ksp_type;
808         PetscInt  n_internal;
809         PetscBool ispcnone;
810 
811         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp));
812         PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp));
813         PetscCall(KSPGetType(origksp,&ksp_type));
814         PetscCall(KSPSetType(schurksp,ksp_type));
815         PetscCall(KSPGetPC(schurksp,&schurpc));
816         PetscCall(KSPGetPC(origksp,&origpc));
817         PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone));
818         if (!ispcnone) {
819           PCType pc_type;
820           PetscCall(PCGetType(origpc,&pc_type));
821           PetscCall(PCSetType(schurpc,pc_type));
822         } else {
823           PetscCall(PCSetType(schurpc,PCLU));
824         }
825         PetscCall(ISGetSize(is_I,&n_internal));
826         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
827           MatSolverType solver = NULL;
828           PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver));
829           if (solver) {
830             PetscCall(PCFactorSetMatSolverType(schurpc,solver));
831           }
832         }
833         PetscCall(KSPSetUp(schurksp));
834       }
835       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
836       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl));
837       PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl));
838       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense));
839       if (Sdense) {
840         for (j=0;j<subset_size;j++) {
841           dummy_idx[j]=local_size+j;
842         }
843         PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES));
844       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
845       PetscCall(MatDestroy(&S_Ej));
846       PetscCall(MatDestroy(&S_Ej_expl));
847       local_size += subset_size;
848     }
849     PetscCall(PetscFree2(dummy_idx,work));
850     /* free */
851     PetscCall(ISDestroy(&is_I));
852     PetscCall(MatDestroy(&AE_II));
853     PetscCall(PetscFree(all_local_idx_N));
854   } else {
855     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
856     Mat               *gdswA;
857     Vec               Dall = NULL;
858     IS                is_A_all,*is_p_r = NULL;
859     MatType           Stype;
860     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
861     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
862     const PetscScalar *rS_data;
863     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
864     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
865     PetscBool         schur_has_vertices,factor_workaround;
866     PetscBool         use_cholesky;
867 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
868     PetscBool         oldpin;
869 #endif
870 
871     /* get sizes */
872     n_I = 0;
873     if (is_I_layer) {
874       PetscCall(ISGetLocalSize(is_I_layer,&n_I));
875     }
876     economic = PETSC_FALSE;
877     PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum));
878     if (cum != n_I) economic = PETSC_TRUE;
879     PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL));
880     size_active_schur = local_size;
881 
882     /* import scaling vector (wrong formulation if we have 3D edges) */
883     if (scaling && compute_Stilda) {
884       const PetscScalar *array;
885       PetscScalar       *array2;
886       const PetscInt    *idxs;
887       PetscInt          i;
888 
889       PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
890       PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall));
891       PetscCall(VecGetArrayRead(scaling,&array));
892       PetscCall(VecGetArray(Dall,&array2));
893       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
894       PetscCall(VecRestoreArray(Dall,&array2));
895       PetscCall(VecRestoreArrayRead(scaling,&array));
896       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
897       deluxe = PETSC_FALSE;
898     }
899 
900     /* size active schurs does not count any dirichlet or vertex dof on the interface */
901     factor_workaround = PETSC_FALSE;
902     schur_has_vertices = PETSC_FALSE;
903     cum = n_I+size_active_schur;
904     if (sub_schurs->is_dir) {
905       const PetscInt* idxs;
906       PetscInt        n_dir;
907 
908       PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
909       PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs));
910       PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir));
911       PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs));
912       cum += n_dir;
913       if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
914     }
915     /* include the primal vertices in the Schur complement */
916     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
917       PetscInt n_v;
918 
919       PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
920       if (n_v) {
921         const PetscInt* idxs;
922 
923         PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs));
924         PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v));
925         PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
926         cum += n_v;
927         if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
928         schur_has_vertices = PETSC_TRUE;
929       }
930     }
931     size_schur = cum - n_I;
932     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all));
933 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
934     oldpin = sub_schurs->A->boundtocpu;
935     PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE));
936 #endif
937     if (cum == n) {
938       PetscCall(ISSetPermutation(is_A_all));
939       PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A));
940     } else {
941       PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A));
942     }
943 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
944     PetscCall(MatBindToCPU(sub_schurs->A,oldpin));
945 #endif
946     PetscCall(MatSetOptionsPrefix(A,sub_schurs->prefix));
947     PetscCall(MatAppendOptionsPrefix(A,"sub_schurs_"));
948 
949     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
950        this is a workaround */
951     if (benign_n) {
952       Vec                    v,benign_AIIm1_ones;
953       ISLocalToGlobalMapping N_to_reor;
954       IS                     is_p0,is_p0_p;
955       PetscScalar            *cs_AIB,*AIIm1_data;
956       PetscInt               sizeA;
957 
958       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor));
959       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
960       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p));
961       PetscCall(ISDestroy(&is_p0));
962       PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
963       PetscCall(VecGetSize(v,&sizeA));
964       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat));
965       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat));
966       PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB));
967       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
968       PetscCall(PetscMalloc1(benign_n,&is_p_r));
969       /* compute colsum of A_IB restricted to pressures */
970       for (i=0;i<benign_n;i++) {
971         const PetscScalar *array;
972         const PetscInt    *idxs;
973         PetscInt          j,nz;
974 
975         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]));
976         PetscCall(ISGetLocalSize(is_p_r[i],&nz));
977         PetscCall(ISGetIndices(is_p_r[i],&idxs));
978         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
979         PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
980         PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
981         PetscCall(MatMult(A,benign_AIIm1_ones,v));
982         PetscCall(VecResetArray(benign_AIIm1_ones));
983         PetscCall(VecGetArrayRead(v,&array));
984         for (j=0;j<size_schur;j++) {
985 #if defined(PETSC_USE_COMPLEX)
986           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
987 #else
988           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
989 #endif
990         }
991         PetscCall(VecRestoreArrayRead(v,&array));
992       }
993       PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB));
994       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
995       PetscCall(VecDestroy(&v));
996       PetscCall(VecDestroy(&benign_AIIm1_ones));
997       PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
998       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
999       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
1000       PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL));
1001       PetscCall(ISDestroy(&is_p0_p));
1002       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
1003     }
1004     PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric));
1005     PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian));
1006     PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef));
1007 
1008     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
1009     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
1010 
1011     /* when using the benign subspace trick, the local Schur complements are SPD */
1012     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1013        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1014     if (benign_trick) {
1015       sub_schurs->is_posdef = PETSC_TRUE;
1016       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg));
1017       if (flg) use_cholesky = PETSC_FALSE;
1018     }
1019 
1020     if (n_I) {
1021       IS        is_schur;
1022       char      stype[64];
1023       PetscBool gpu = PETSC_FALSE;
1024 
1025       if (use_cholesky) {
1026         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F));
1027       } else {
1028         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F));
1029       }
1030       PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE));
1031 #if defined(PETSC_HAVE_MKL_PARDISO)
1032       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10));
1033 #endif
1034       /* subsets ordered last */
1035       PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur));
1036       PetscCall(MatFactorSetSchurIS(F,is_schur));
1037       PetscCall(ISDestroy(&is_schur));
1038 
1039       /* factorization step */
1040       if (use_cholesky) {
1041         PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL));
1042 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1043         PetscCall(MatMumpsSetIcntl(F,19,2));
1044 #endif
1045         PetscCall(MatCholeskyFactorNumeric(F,A,NULL));
1046         S_lower_triangular = PETSC_TRUE;
1047       } else {
1048         PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL));
1049 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1050         PetscCall(MatMumpsSetIcntl(F,19,3));
1051 #endif
1052         PetscCall(MatLUFactorNumeric(F,A,NULL));
1053       }
1054       PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view"));
1055 
1056       if (matl_dbg_viewer) {
1057         Mat S;
1058         IS  is;
1059 
1060         PetscCall(PetscObjectSetName((PetscObject)A,"A"));
1061         PetscCall(MatView(A,matl_dbg_viewer));
1062         PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1063         PetscCall(PetscObjectSetName((PetscObject)S,"S"));
1064         PetscCall(MatView(S,matl_dbg_viewer));
1065         PetscCall(MatDestroy(&S));
1066         PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is));
1067         PetscCall(PetscObjectSetName((PetscObject)is,"I"));
1068         PetscCall(ISView(is,matl_dbg_viewer));
1069         PetscCall(ISDestroy(&is));
1070         PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is));
1071         PetscCall(PetscObjectSetName((PetscObject)is,"B"));
1072         PetscCall(ISView(is,matl_dbg_viewer));
1073         PetscCall(ISDestroy(&is));
1074         PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA"));
1075         PetscCall(ISView(is_A_all,matl_dbg_viewer));
1076         for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1077           IS   is;
1078           char name[16];
1079 
1080           PetscCall(PetscSNPrintf(name,sizeof(name),"IE%" PetscInt_FMT,i));
1081           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1082           PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is));
1083           PetscCall(PetscObjectSetName((PetscObject)is,name));
1084           PetscCall(ISView(is,matl_dbg_viewer));
1085           PetscCall(ISDestroy(&is));
1086           if (sub_schurs->change) {
1087             Mat T;
1088 
1089             PetscCall(PetscSNPrintf(name,sizeof(name),"TE%" PetscInt_FMT,i));
1090             PetscCall(KSPGetOperators(sub_schurs->change[i],&T,NULL));
1091             PetscCall(PetscObjectSetName((PetscObject)T,name));
1092             PetscCall(MatView(T,matl_dbg_viewer));
1093             PetscCall(PetscSNPrintf(name,sizeof(name),"ITE%" PetscInt_FMT,i));
1094             PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i],name));
1095             PetscCall(ISView(sub_schurs->change_primal_sub[i],matl_dbg_viewer));
1096           }
1097           cum += subset_size;
1098         }
1099         PetscCall(PetscViewerFlush(matl_dbg_viewer));
1100       }
1101 
1102       /* get explicit Schur Complement computed during numeric factorization */
1103       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1104       PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype)));
1105 #if defined(PETSC_HAVE_CUDA)
1106       PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,""));
1107 #endif
1108       if (gpu) {
1109         PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype)));
1110       }
1111       PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL));
1112       PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all));
1113       PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef));
1114       PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian));
1115       PetscCall(MatGetType(S_all,&Stype));
1116 
1117       /* we can reuse the solvers if we are not using the economic version */
1118       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1119       if (!sub_schurs->gdsw) {
1120         factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1121         if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1122           reuse_solvers = factor_workaround = PETSC_FALSE;
1123       }
1124       solver_S = PETSC_TRUE;
1125 
1126       /* update the Schur complement with the change of basis on the pressures */
1127       if (benign_n) {
1128         const PetscScalar *cs_AIB;
1129         PetscScalar       *S_data,*AIIm1_data;
1130         Mat               S2 = NULL,S3 = NULL; /* dbg */
1131         PetscScalar       *S2_data,*S3_data; /* dbg */
1132         Vec               v,benign_AIIm1_ones;
1133         PetscInt          sizeA;
1134 
1135         PetscCall(MatDenseGetArray(S_all,&S_data));
1136         PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
1137         PetscCall(VecGetSize(v,&sizeA));
1138 #if defined(PETSC_HAVE_MUMPS)
1139         PetscCall(MatMumpsSetIcntl(F,26,0));
1140 #endif
1141 #if defined(PETSC_HAVE_MKL_PARDISO)
1142         PetscCall(MatMkl_PardisoSetCntl(F,70,1));
1143 #endif
1144         PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB));
1145         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
1146         if (matl_dbg_viewer) {
1147           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2));
1148           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3));
1149           PetscCall(MatDenseGetArray(S2,&S2_data));
1150           PetscCall(MatDenseGetArray(S3,&S3_data));
1151         }
1152         for (i=0;i<benign_n;i++) {
1153           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1154           const PetscInt *idxs;
1155           PetscInt       k,j,nz;
1156           PetscBLASInt   B_k,B_n;
1157 
1158           PetscCall(PetscCalloc1(benign_n,&sums));
1159           PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
1160           PetscCall(VecCopy(benign_AIIm1_ones,v));
1161           PetscCall(MatSolve(F,v,benign_AIIm1_ones));
1162           PetscCall(MatMult(A,benign_AIIm1_ones,v));
1163           PetscCall(VecResetArray(benign_AIIm1_ones));
1164           /* p0 dofs (eliminated) are excluded from the sums */
1165           for (k=0;k<benign_n;k++) {
1166             PetscCall(ISGetLocalSize(is_p_r[k],&nz));
1167             PetscCall(ISGetIndices(is_p_r[k],&idxs));
1168             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1169             PetscCall(ISRestoreIndices(is_p_r[k],&idxs));
1170           }
1171           PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
1172           if (matl_dbg_viewer) {
1173             Vec  vv;
1174             char name[16];
1175 
1176             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv));
1177             PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%" PetscInt_FMT,i));
1178             PetscCall(PetscObjectSetName((PetscObject)vv,name));
1179             PetscCall(VecView(vv,matl_dbg_viewer));
1180           }
1181           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1182           /* cs_AIB already scaled by 1./nz */
1183           B_k = 1;
1184           for (k=0;k<benign_n;k++) {
1185             sum  = sums[k];
1186             PetscCall(PetscBLASIntCast(size_schur,&B_n));
1187 
1188             if (PetscAbsScalar(sum) == 0.0) continue;
1189             if (k == i) {
1190               PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1191               if (matl_dbg_viewer) {
1192                 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1193               }
1194             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1195               sum /= 2.0;
1196               PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1197               if (matl_dbg_viewer) {
1198                 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1199               }
1200             }
1201           }
1202           sum  = 1.;
1203           PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1204           if (matl_dbg_viewer) {
1205             PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1206           }
1207           PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
1208           /* set p0 entry of AIIm1_ones to zero */
1209           PetscCall(ISGetLocalSize(is_p_r[i],&nz));
1210           PetscCall(ISGetIndices(is_p_r[i],&idxs));
1211           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1212           PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
1213           PetscCall(PetscFree(sums));
1214         }
1215         PetscCall(VecDestroy(&benign_AIIm1_ones));
1216         if (matl_dbg_viewer) {
1217           PetscCall(MatDenseRestoreArray(S2,&S2_data));
1218           PetscCall(MatDenseRestoreArray(S3,&S3_data));
1219         }
1220         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1221           PetscInt k,j;
1222           for (k=0;k<size_schur;k++) {
1223             for (j=k;j<size_schur;j++) {
1224               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1225             }
1226           }
1227         }
1228 
1229         /* restore defaults */
1230 #if defined(PETSC_HAVE_MUMPS)
1231         PetscCall(MatMumpsSetIcntl(F,26,-1));
1232 #endif
1233 #if defined(PETSC_HAVE_MKL_PARDISO)
1234         PetscCall(MatMkl_PardisoSetCntl(F,70,0));
1235 #endif
1236         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB));
1237         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
1238         PetscCall(VecDestroy(&v));
1239         PetscCall(MatDenseRestoreArray(S_all,&S_data));
1240         if (matl_dbg_viewer) {
1241           Mat S;
1242 
1243           PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1244           PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1245           PetscCall(PetscObjectSetName((PetscObject)S,"Sb"));
1246           PetscCall(MatView(S,matl_dbg_viewer));
1247           PetscCall(MatDestroy(&S));
1248           PetscCall(PetscObjectSetName((PetscObject)S2,"S2P"));
1249           PetscCall(MatView(S2,matl_dbg_viewer));
1250           PetscCall(PetscObjectSetName((PetscObject)S3,"S3P"));
1251           PetscCall(MatView(S3,matl_dbg_viewer));
1252           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs"));
1253           PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer));
1254           PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1255         }
1256         PetscCall(MatDestroy(&S2));
1257         PetscCall(MatDestroy(&S3));
1258       }
1259       if (!reuse_solvers) {
1260         for (i=0;i<benign_n;i++) {
1261           PetscCall(ISDestroy(&is_p_r[i]));
1262         }
1263         PetscCall(PetscFree(is_p_r));
1264         PetscCall(MatDestroy(&cs_AIB_mat));
1265         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1266       }
1267     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1268       PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all));
1269       PetscCall(MatGetType(S_all,&Stype));
1270       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1271       factor_workaround = PETSC_FALSE;
1272       solver_S = PETSC_FALSE;
1273     }
1274 
1275     if (reuse_solvers) {
1276       Mat                A_II,pA_II,Afake;
1277       Vec                vec1_B;
1278       PCBDDCReuseSolvers msolv_ctx;
1279       PetscInt           n_R;
1280 
1281       if (sub_schurs->reuse_solver) {
1282         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1283       } else {
1284         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1285       }
1286       msolv_ctx = sub_schurs->reuse_solver;
1287       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,&pA_II,NULL,NULL,NULL));
1288       PetscCall(PetscObjectReference((PetscObject)F));
1289       msolv_ctx->F = F;
1290       PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL));
1291       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1292       {
1293         PetscScalar *array;
1294         PetscInt    n;
1295 
1296         PetscCall(VecGetLocalSize(msolv_ctx->sol,&n));
1297         PetscCall(VecGetArray(msolv_ctx->sol,&array));
1298         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs));
1299         PetscCall(VecRestoreArray(msolv_ctx->sol,&array));
1300       }
1301       msolv_ctx->has_vertices = schur_has_vertices;
1302 
1303       /* interior solver */
1304       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver));
1305       PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,pA_II));
1306       PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL));
1307       PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)"));
1308       PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx));
1309       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1310       PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior));
1311       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose));
1312       if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Destroy));
1313 
1314       /* correction solver */
1315       if (!sub_schurs->gdsw) {
1316         PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver));
1317         PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL));
1318         PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)"));
1319         PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx));
1320         PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1321         PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction));
1322         PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose));
1323 
1324         /* scatter and vecs for Schur complement solver */
1325         PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B));
1326         PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL));
1327         if (!schur_has_vertices) {
1328           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B));
1329           PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B));
1330           PetscCall(PetscObjectReference((PetscObject)is_A_all));
1331           msolv_ctx->is_R = is_A_all;
1332         } else {
1333           IS              is_B_all;
1334           const PetscInt* idxs;
1335           PetscInt        dual,n_v,n;
1336 
1337           PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
1338           dual = size_schur - n_v;
1339           PetscCall(ISGetLocalSize(is_A_all,&n));
1340           PetscCall(ISGetIndices(is_A_all,&idxs));
1341           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all));
1342           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B));
1343           PetscCall(ISDestroy(&is_B_all));
1344           PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all));
1345           PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B));
1346           PetscCall(ISDestroy(&is_B_all));
1347           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R));
1348           PetscCall(ISRestoreIndices(is_A_all,&idxs));
1349         }
1350         PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R));
1351         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake));
1352         PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY));
1353         PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY));
1354         PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake));
1355         PetscCall(MatDestroy(&Afake));
1356         PetscCall(VecDestroy(&vec1_B));
1357       }
1358       /* communicate benign info to solver context */
1359       if (benign_n) {
1360         PetscScalar *array;
1361 
1362         msolv_ctx->benign_n = benign_n;
1363         msolv_ctx->benign_zerodiag_subs = is_p_r;
1364         PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals));
1365         msolv_ctx->benign_csAIB = cs_AIB_mat;
1366         PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL));
1367         PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array));
1368         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec));
1369         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array));
1370         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1371       }
1372     } else {
1373       if (sub_schurs->reuse_solver) {
1374         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1375       }
1376       PetscCall(PetscFree(sub_schurs->reuse_solver));
1377     }
1378     PetscCall(MatDestroy(&A));
1379     PetscCall(ISDestroy(&is_A_all));
1380 
1381     /* Work arrays */
1382     PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work));
1383 
1384     /* S_Ej_all */
1385     cum = cum2 = 0;
1386     PetscCall(MatDenseGetArrayRead(S_all,&rS_data));
1387     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr));
1388     if (sub_schurs->sum_S_Ej_inv_all) {
1389       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1390     }
1391     if (sub_schurs->gdsw) {
1392       PetscCall(MatCreateSubMatrices(sub_schurs->A,sub_schurs->n_subs,sub_schurs->is_subs,sub_schurs->is_subs,MAT_INITIAL_MATRIX,&gdswA));
1393     }
1394     for (i=0;i<sub_schurs->n_subs;i++) {
1395       PetscInt j;
1396 
1397       /* get S_E (or K^i_EE for GDSW) */
1398       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1399       if (sub_schurs->gdsw) {
1400         Mat T;
1401 
1402         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&T));
1403         PetscCall(MatConvert(gdswA[i],MATDENSE,MAT_REUSE_MATRIX,&T));
1404         PetscCall(MatDestroy(&T));
1405       } else {
1406         if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1407           PetscInt k;
1408           for (k=0;k<subset_size;k++) {
1409             for (j=k;j<subset_size;j++) {
1410               work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1411               work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1412             }
1413           }
1414         } else { /* just copy to workspace */
1415           PetscInt k;
1416           for (k=0;k<subset_size;k++) {
1417             for (j=0;j<subset_size;j++) {
1418               work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1419             }
1420           }
1421         }
1422       }
1423       /* insert S_E values */
1424       if (sub_schurs->change) {
1425         Mat change_sub,SEj,T;
1426 
1427         /* change basis */
1428         PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1429         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1430         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1431           Mat T2;
1432           PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1433           PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1434           PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1435           PetscCall(MatDestroy(&T2));
1436         } else {
1437           PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1438         }
1439         PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1440         PetscCall(MatDestroy(&T));
1441         PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL));
1442         PetscCall(MatDestroy(&SEj));
1443       }
1444       PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1445       if (compute_Stilda) {
1446         if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1447           Mat               M;
1448           const PetscScalar *vals;
1449           PetscBool         isdense,isdensecuda;
1450 
1451           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M));
1452           PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef));
1453           PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian));
1454           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1455             PetscCall(MatSetType(M,Stype));
1456           }
1457           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense));
1458           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda));
1459           if (use_cholesky) {
1460             PetscCall(MatCholeskyFactor(M,NULL,NULL));
1461           } else {
1462             PetscCall(MatLUFactor(M,NULL,NULL,NULL));
1463           }
1464           if (isdense) {
1465             PetscCall(MatSeqDenseInvertFactors_Private(M));
1466 #if defined(PETSC_HAVE_CUDA)
1467           } else if (isdensecuda) {
1468             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1469 #endif
1470           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1471           PetscCall(MatDenseGetArrayRead(M,&vals));
1472           PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size));
1473           PetscCall(MatDenseRestoreArrayRead(M,&vals));
1474           PetscCall(MatDestroy(&M));
1475         } else if (scaling) { /* not using deluxe */
1476           Mat         SEj;
1477           Vec         D;
1478           PetscScalar *array;
1479 
1480           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1481           PetscCall(VecGetArray(Dall,&array));
1482           PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D));
1483           PetscCall(VecRestoreArray(Dall,&array));
1484           PetscCall(VecShift(D,-1.));
1485           PetscCall(MatDiagonalScale(SEj,D,D));
1486           PetscCall(MatDestroy(&SEj));
1487           PetscCall(VecDestroy(&D));
1488           PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1489         }
1490       }
1491       cum += subset_size;
1492       cum2 += subset_size*(size_schur + 1);
1493       SEj_arr += subset_size*subset_size;
1494       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1495     }
1496     if (sub_schurs->gdsw) {
1497       PetscCall(MatDestroySubMatrices(sub_schurs->n_subs,&gdswA));
1498     }
1499     PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data));
1500     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr));
1501     if (sub_schurs->sum_S_Ej_inv_all) {
1502       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1503     }
1504     if (solver_S) {
1505       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1506     }
1507 
1508     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1509        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1510 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1511     if (reuse_solvers) {
1512       Mat                  St;
1513       MatFactorSchurStatus st;
1514 
1515       flg  = PETSC_FALSE;
1516       PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL));
1517       PetscCall(MatFactorGetSchurComplement(F,&St,&st));
1518       PetscCall(MatBindToCPU(St,flg));
1519       PetscCall(MatFactorRestoreSchurComplement(F,&St,st));
1520     }
1521 #endif
1522 
1523     schur_factor = NULL;
1524     if (compute_Stilda && size_active_schur) {
1525 
1526       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1527         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1528         PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur));
1529         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1530       } else {
1531         Mat S_all_inv=NULL;
1532 
1533         if (solver_S && !sub_schurs->gdsw) {
1534           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1535              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1536           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1537             PetscScalar *data;
1538             PetscInt     nd = 0;
1539 
1540             if (!use_potr) {
1541               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1542             }
1543             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1544             PetscCall(MatDenseGetArray(S_all_inv,&data));
1545             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1546               PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1547             }
1548 
1549             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1550             if (schur_has_vertices) {
1551               Mat          M;
1552               PetscScalar *tdata;
1553               PetscInt     nv = 0, news;
1554 
1555               PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv));
1556               news = size_active_schur + nv;
1557               PetscCall(PetscCalloc1(news*news,&tdata));
1558               for (i=0;i<size_active_schur;i++) {
1559                 PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i));
1560                 PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv));
1561               }
1562               for (i=0;i<nv;i++) {
1563                 PetscInt k = i+size_active_schur;
1564                 PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i));
1565               }
1566 
1567               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M));
1568               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1569               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1570               /* save the factors */
1571               cum = 0;
1572               PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor));
1573               for (i=0;i<size_active_schur;i++) {
1574                 PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i));
1575                 cum += size_active_schur - i;
1576               }
1577               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1578               PetscCall(MatSeqDenseInvertFactors_Private(M));
1579               /* move back just the active dofs to the Schur complement */
1580               for (i=0;i<size_active_schur;i++) {
1581                 PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur));
1582               }
1583               PetscCall(PetscFree(tdata));
1584               PetscCall(MatDestroy(&M));
1585             } else { /* we can factorize and invert just the activedofs */
1586               Mat         M;
1587               PetscScalar *aux;
1588 
1589               PetscCall(PetscMalloc1(nd,&aux));
1590               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1591               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M));
1592               PetscCall(MatDenseSetLDA(M,size_schur));
1593               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1594               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1595               PetscCall(MatSeqDenseInvertFactors_Private(M));
1596               PetscCall(MatDestroy(&M));
1597               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M));
1598               PetscCall(MatZeroEntries(M));
1599               PetscCall(MatDestroy(&M));
1600               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M));
1601               PetscCall(MatDenseSetLDA(M,size_schur));
1602               PetscCall(MatZeroEntries(M));
1603               PetscCall(MatDestroy(&M));
1604               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1605               PetscCall(PetscFree(aux));
1606             }
1607             PetscCall(MatDenseRestoreArray(S_all_inv,&data));
1608           } else { /* use MatFactor calls to invert S */
1609             PetscCall(MatFactorInvertSchurComplement(F));
1610             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1611           }
1612         } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1613           PetscCall(PetscObjectReference((PetscObject)S_all));
1614           S_all_inv = S_all;
1615           PetscCall(MatDenseGetArray(S_all_inv,&S_data));
1616           PetscCall(PetscBLASIntCast(size_schur,&B_N));
1617           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1618           if (use_potr) {
1619             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1620             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1621             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1622             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1623           } else if (use_sytr) {
1624             PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1625             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1626             PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1627             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1628           } else {
1629             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1630             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1631             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1632             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1633           }
1634           PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur));
1635           PetscCall(PetscFPTrapPop());
1636           PetscCall(MatDenseRestoreArray(S_all_inv,&S_data));
1637         } else if (sub_schurs->gdsw) {
1638           Mat      tS, tX, SEj, S_II, S_IE, S_EE;
1639           KSP      pS_II;
1640           PC       pS_II_pc;
1641           IS       EE, II;
1642           PetscInt nS;
1643 
1644           PetscCall(MatFactorCreateSchurComplement(F,&tS,NULL));
1645           PetscCall(MatGetSize(tS,&nS,NULL));
1646           PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1647           for (i=0,cum=0;i<sub_schurs->n_subs;i++) { /* naive implementation */
1648             PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1649             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,SEjinv_arr,&SEj));
1650 
1651             PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&EE));
1652             PetscCall(ISComplement(EE,0,nS,&II));
1653             PetscCall(MatCreateSubMatrix(tS,II,II,MAT_INITIAL_MATRIX,&S_II));
1654             PetscCall(MatCreateSubMatrix(tS,II,EE,MAT_INITIAL_MATRIX,&S_IE));
1655             PetscCall(MatCreateSubMatrix(tS,EE,EE,MAT_INITIAL_MATRIX,&S_EE));
1656             PetscCall(ISDestroy(&II));
1657             PetscCall(ISDestroy(&EE));
1658 
1659             PetscCall(KSPCreate(PETSC_COMM_SELF,&pS_II));
1660             PetscCall(KSPSetType(pS_II,KSPPREONLY));
1661             PetscCall(KSPGetPC(pS_II,&pS_II_pc));
1662             PetscCall(PCSetType(pS_II_pc,PCSVD));
1663             PetscCall(KSPSetOptionsPrefix(pS_II,sub_schurs->prefix));
1664             PetscCall(KSPAppendOptionsPrefix(pS_II,"pseudo_"));
1665             PetscCall(KSPSetOperators(pS_II,S_II,S_II));
1666             PetscCall(MatDestroy(&S_II));
1667             PetscCall(KSPSetFromOptions(pS_II));
1668             PetscCall(KSPSetUp(pS_II));
1669             PetscCall(MatDuplicate(S_IE,MAT_DO_NOT_COPY_VALUES,&tX));
1670             PetscCall(KSPMatSolve(pS_II,S_IE,tX));
1671             PetscCall(KSPDestroy(&pS_II));
1672 
1673             PetscCall(MatTransposeMatMult(S_IE,tX,MAT_REUSE_MATRIX,PETSC_DEFAULT,&SEj));
1674             PetscCall(MatDestroy(&S_IE));
1675             PetscCall(MatDestroy(&tX));
1676             PetscCall(MatAYPX(SEj,-1,S_EE,SAME_NONZERO_PATTERN));
1677             PetscCall(MatDestroy(&S_EE));
1678 
1679             PetscCall(MatDestroy(&SEj));
1680             cum += subset_size;
1681             SEjinv_arr += subset_size*subset_size;
1682           }
1683           PetscCall(MatDestroy(&tS));
1684           PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1685         }
1686         /* S_Ej_tilda_all */
1687         cum = cum2 = 0;
1688         rS_data = NULL;
1689         if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data));
1690         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1691         for (i=0;i<sub_schurs->n_subs;i++) {
1692           PetscInt j;
1693 
1694           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1695           /* get (St^-1)_E */
1696           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1697              will be properly accessed later during adaptive selection */
1698           if (rS_data) {
1699             if (S_lower_triangular) {
1700               PetscInt k;
1701               if (sub_schurs->change) {
1702                 for (k=0;k<subset_size;k++) {
1703                   for (j=k;j<subset_size;j++) {
1704                     work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1705                     work[j*subset_size+k] = work[k*subset_size+j];
1706                   }
1707                 }
1708               } else {
1709                 for (k=0;k<subset_size;k++) {
1710                   for (j=k;j<subset_size;j++) {
1711                     work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1712                   }
1713                 }
1714               }
1715             } else {
1716               PetscInt k;
1717               for (k=0;k<subset_size;k++) {
1718                 for (j=0;j<subset_size;j++) {
1719                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1720                 }
1721               }
1722             }
1723           }
1724           if (sub_schurs->change) {
1725             Mat change_sub,SEj,T;
1726             PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1./PETSC_SMALL;
1727 
1728             /* change basis */
1729             PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1730             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,rS_data ? work : SEjinv_arr,&SEj));
1731             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1732               Mat T2;
1733               PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1734               PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1735               PetscCall(MatDestroy(&T2));
1736               PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1737             } else {
1738               PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1739             }
1740             PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1741             PetscCall(MatDestroy(&T));
1742             PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],val,NULL,NULL));
1743             PetscCall(MatDestroy(&SEj));
1744           }
1745           if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size));
1746           cum += subset_size;
1747           cum2 += subset_size*(size_schur + 1);
1748           SEjinv_arr += subset_size*subset_size;
1749         }
1750         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1751         if (S_all_inv) {
1752           PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data));
1753           if (solver_S) {
1754             if (schur_has_vertices) {
1755               PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED));
1756             } else {
1757               PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED));
1758             }
1759           }
1760         }
1761         PetscCall(MatDestroy(&S_all_inv));
1762       }
1763 
1764       /* move back factors if needed */
1765       if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1766         Mat      S_tmp;
1767         PetscInt nd = 0;
1768 
1769         PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1770         PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL));
1771         if (use_potr) {
1772           PetscScalar *data;
1773 
1774           PetscCall(MatDenseGetArray(S_tmp,&data));
1775           PetscCall(PetscArrayzero(data,size_schur*size_schur));
1776 
1777           if (S_lower_triangular) {
1778             cum = 0;
1779             for (i=0;i<size_active_schur;i++) {
1780               PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i));
1781               cum += size_active_schur-i;
1782             }
1783           } else {
1784             PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur));
1785           }
1786           if (sub_schurs->is_dir) {
1787             PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1788             for (i=0;i<nd;i++) {
1789               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1790             }
1791           }
1792           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1793              set the diagonal entry of the Schur factor to a very large value */
1794           for (i=size_active_schur+nd;i<size_schur;i++) {
1795             data[i*(size_schur+1)] = infty;
1796           }
1797           PetscCall(MatDenseRestoreArray(S_tmp,&data));
1798         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1799         PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED));
1800       }
1801     } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1802       PetscScalar *data;
1803       PetscInt    nd = 0;
1804 
1805       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1806         PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1807       }
1808       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1809       PetscCall(MatDenseGetArray(S_all,&data));
1810       for (i=0;i<size_active_schur;i++) {
1811         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1812       }
1813       for (i=size_active_schur+nd;i<size_schur;i++) {
1814         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1815         data[i*(size_schur+1)] = infty;
1816       }
1817       PetscCall(MatDenseRestoreArray(S_all,&data));
1818       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1819     }
1820     PetscCall(PetscFree(work));
1821     PetscCall(PetscFree(schur_factor));
1822     PetscCall(VecDestroy(&Dall));
1823   }
1824   PetscCall(ISDestroy(&is_I_layer));
1825   PetscCall(MatDestroy(&S_all));
1826   PetscCall(MatDestroy(&A_BB));
1827   PetscCall(MatDestroy(&A_IB));
1828   PetscCall(MatDestroy(&A_BI));
1829   PetscCall(MatDestroy(&F));
1830 
1831   PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz));
1832   for (i=0;i<sub_schurs->n_subs;i++) {
1833     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]));
1834   }
1835   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer));
1836   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz));
1837   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1838   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1839   if (compute_Stilda) {
1840     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz));
1841     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1842     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1843     if (deluxe) {
1844       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz));
1845       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1846       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1847     }
1848   }
1849   PetscCall(ISDestroy(&is_I_layer));
1850 
1851   /* Get local part of (\sum_j S_Ej) */
1852   if (!sub_schurs->sum_S_Ej_all) {
1853     PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all));
1854   }
1855   PetscCall(VecSet(gstash,0.0));
1856   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray));
1857   PetscCall(VecPlaceArray(lstash,stasharray));
1858   PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1859   PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1860   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray));
1861   PetscCall(VecResetArray(lstash));
1862   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray));
1863   PetscCall(VecPlaceArray(lstash,stasharray));
1864   PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1865   PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1866   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray));
1867   PetscCall(VecResetArray(lstash));
1868 
1869   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1870   if (compute_Stilda) {
1871     PetscCall(VecSet(gstash,0.0));
1872     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1873     PetscCall(VecPlaceArray(lstash,stasharray));
1874     PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1875     PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1876     PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1877     PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1878     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1879     PetscCall(VecResetArray(lstash));
1880     if (deluxe) {
1881       PetscCall(VecSet(gstash,0.0));
1882       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1883       PetscCall(VecPlaceArray(lstash,stasharray));
1884       PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1885       PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1886       PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1887       PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1888       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1889       PetscCall(VecResetArray(lstash));
1890     } else if (!sub_schurs->gdsw) {
1891       PetscScalar *array;
1892       PetscInt    cum;
1893 
1894       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1895       cum = 0;
1896       for (i=0;i<sub_schurs->n_subs;i++) {
1897         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1898         PetscCall(PetscBLASIntCast(subset_size,&B_N));
1899         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1900         if (use_potr) {
1901           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1902           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1903           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1904           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1905         } else if (use_sytr) {
1906           PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1907           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1908           PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1909           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1910         } else {
1911           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1912           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1913           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1914           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1915         }
1916         PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size));
1917         PetscCall(PetscFPTrapPop());
1918         cum += subset_size*subset_size;
1919       }
1920       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1921       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1922       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1923       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1924     }
1925   }
1926   PetscCall(VecDestroy(&lstash));
1927   PetscCall(VecDestroy(&gstash));
1928   PetscCall(VecScatterDestroy(&sstash));
1929 
1930   if (matl_dbg_viewer) {
1931     if (sub_schurs->S_Ej_all) {
1932       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE"));
1933       PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer));
1934     }
1935     if (sub_schurs->sum_S_Ej_all) {
1936       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE"));
1937       PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer));
1938     }
1939     if (sub_schurs->sum_S_Ej_inv_all) {
1940       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm"));
1941       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer));
1942     }
1943     if (sub_schurs->sum_S_Ej_tilda_all) {
1944       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt"));
1945       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer));
1946     }
1947   }
1948 
1949   /* free workspace */
1950   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1951   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1952   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1953   PetscCall(PetscFree2(Bwork,pivots));
1954   PetscCall(PetscCommDestroy(&comm_n));
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1959 {
1960   IS              *faces,*edges,*all_cc,vertices;
1961   PetscInt        s,i,n_faces,n_edges,n_all_cc;
1962   PetscBool       is_sorted,ispardiso,ismumps;
1963 
1964   PetscFunctionBegin;
1965   PetscCall(ISSorted(is_I,&is_sorted));
1966   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1967   PetscCall(ISSorted(is_B,&is_sorted));
1968   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1969 
1970   /* reset any previous data */
1971   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1972 
1973   sub_schurs->gdsw = gdsw;
1974 
1975   /* get index sets for faces and edges (already sorted by global ordering) */
1976   PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1977   n_all_cc = n_faces+n_edges;
1978   PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge));
1979   PetscCall(PetscMalloc1(n_all_cc,&all_cc));
1980   n_all_cc = 0;
1981   for (i=0;i<n_faces;i++) {
1982     PetscCall(ISGetSize(faces[i],&s));
1983     if (!s) continue;
1984     if (copycc) {
1985       PetscCall(ISDuplicate(faces[i],&all_cc[n_all_cc]));
1986     } else {
1987       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1988       all_cc[n_all_cc] = faces[i];
1989     }
1990     n_all_cc++;
1991   }
1992   for (i=0;i<n_edges;i++) {
1993     PetscCall(ISGetSize(edges[i],&s));
1994     if (!s) continue;
1995     if (copycc) {
1996       PetscCall(ISDuplicate(edges[i],&all_cc[n_all_cc]));
1997     } else {
1998       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1999       all_cc[n_all_cc] = edges[i];
2000     }
2001     PetscCall(PetscBTSet(sub_schurs->is_edge,n_all_cc));
2002     n_all_cc++;
2003   }
2004   PetscCall(PetscObjectReference((PetscObject)vertices));
2005   sub_schurs->is_vertices = vertices;
2006   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
2007   sub_schurs->is_dir = NULL;
2008   PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir));
2009 
2010   /* Determine if MatFactor can be used */
2011   PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix));
2012 #if defined(PETSC_HAVE_MUMPS)
2013   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type)));
2014 #elif defined(PETSC_HAVE_MKL_PARDISO)
2015   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type)));
2016 #else
2017   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type)));
2018 #endif
2019 #if defined(PETSC_USE_COMPLEX)
2020   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
2021 #else
2022   sub_schurs->is_hermitian  = PETSC_TRUE;
2023 #endif
2024   sub_schurs->is_posdef     = PETSC_TRUE;
2025   sub_schurs->is_symmetric  = PETSC_TRUE;
2026   sub_schurs->debug         = PETSC_FALSE;
2027   sub_schurs->restrict_comm = PETSC_FALSE;
2028   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
2029   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL));
2030   PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL));
2031   PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL));
2032   PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL));
2033   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL));
2034   PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL));
2035   PetscOptionsEnd();
2036   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps));
2037   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso));
2038   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
2039 
2040   /* for reals, symmetric and hermitian are synonims */
2041 #if !defined(PETSC_USE_COMPLEX)
2042   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
2043   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
2044 #endif
2045 
2046   PetscCall(PetscObjectReference((PetscObject)is_I));
2047   sub_schurs->is_I = is_I;
2048   PetscCall(PetscObjectReference((PetscObject)is_B));
2049   sub_schurs->is_B = is_B;
2050   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
2051   sub_schurs->l2gmap = graph->l2gmap;
2052   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
2053   sub_schurs->BtoNmap = BtoNmap;
2054   sub_schurs->n_subs = n_all_cc;
2055   sub_schurs->is_subs = all_cc;
2056   sub_schurs->S_Ej_all = NULL;
2057   sub_schurs->sum_S_Ej_all = NULL;
2058   sub_schurs->sum_S_Ej_inv_all = NULL;
2059   sub_schurs->sum_S_Ej_tilda_all = NULL;
2060   sub_schurs->is_Ej_all = NULL;
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
2065 {
2066   PCBDDCSubSchurs schurs_ctx;
2067 
2068   PetscFunctionBegin;
2069   PetscCall(PetscNew(&schurs_ctx));
2070   schurs_ctx->n_subs = 0;
2071   *sub_schurs = schurs_ctx;
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2076 {
2077   PetscInt       i;
2078 
2079   PetscFunctionBegin;
2080   if (!sub_schurs) PetscFunctionReturn(0);
2081   PetscCall(PetscFree(sub_schurs->prefix));
2082   PetscCall(MatDestroy(&sub_schurs->A));
2083   PetscCall(MatDestroy(&sub_schurs->S));
2084   PetscCall(ISDestroy(&sub_schurs->is_I));
2085   PetscCall(ISDestroy(&sub_schurs->is_B));
2086   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2087   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2088   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2089   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2090   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2091   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2092   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2093   PetscCall(ISDestroy(&sub_schurs->is_vertices));
2094   PetscCall(ISDestroy(&sub_schurs->is_dir));
2095   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2096   for (i=0;i<sub_schurs->n_subs;i++) {
2097     PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2098   }
2099   if (sub_schurs->n_subs) {
2100     PetscCall(PetscFree(sub_schurs->is_subs));
2101   }
2102   if (sub_schurs->reuse_solver) {
2103     PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2104   }
2105   PetscCall(PetscFree(sub_schurs->reuse_solver));
2106   if (sub_schurs->change) {
2107     for (i=0;i<sub_schurs->n_subs;i++) {
2108       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2109       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2110     }
2111   }
2112   PetscCall(PetscFree(sub_schurs->change));
2113   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2114   sub_schurs->n_subs = 0;
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2119 {
2120   PetscFunctionBegin;
2121   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2122   PetscCall(PetscFree(*sub_schurs));
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2127 {
2128   PetscInt       i,j,n;
2129 
2130   PetscFunctionBegin;
2131   n = 0;
2132   for (i=-n_prev;i<0;i++) {
2133     PetscInt start_dof = queue_tip[i];
2134     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2135       PetscInt dof = adjncy[j];
2136       if (!PetscBTLookup(touched,dof)) {
2137         PetscCall(PetscBTSet(touched,dof));
2138         queue_tip[n] = dof;
2139         n++;
2140       }
2141     }
2142   }
2143   *n_added = n;
2144   PetscFunctionReturn(0);
2145 }
2146