xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
134a97f8cSStefano Zampini #include <../src/ksp/pc/impls/bddc/bddc.h>
234a97f8cSStefano Zampini #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
305709791SSatish Balay #include <../src/mat/impls/dense/seq/dense.h>
408122e43SStefano Zampini #include <petscblaslapack.h>
534a97f8cSStefano Zampini 
69fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
75ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
10d62866d3SStefano Zampini 
11ca92afb2SStefano Zampini /* if v2 is not present, correction is done in-place */
125cbda25cSStefano Zampini PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13ca92afb2SStefano Zampini {
14ca92afb2SStefano Zampini   PetscScalar    *array;
15ca92afb2SStefano Zampini   PetscScalar    *array2;
16ca92afb2SStefano Zampini 
17ca92afb2SStefano Zampini   PetscFunctionBegin;
18ca92afb2SStefano Zampini   if (!ctx->benign_n) PetscFunctionReturn(0);
195cbda25cSStefano Zampini   if (sol && full) {
205cbda25cSStefano Zampini     PetscInt n_I,size_schur;
215cbda25cSStefano Zampini 
225cbda25cSStefano Zampini     /* get sizes */
23*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
24*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetSize(v,&n_I));
255cbda25cSStefano Zampini     n_I = n_I - size_schur;
265cbda25cSStefano Zampini     /* get schur sol from array */
27*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArray(v,&array));
28*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
29*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArray(v,&array));
305cbda25cSStefano Zampini     /* apply interior sol correction */
31*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work));
32*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec));
33*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v));
345cbda25cSStefano Zampini   }
35ca92afb2SStefano Zampini   if (v2) {
36ca92afb2SStefano Zampini     PetscInt nl;
37ca92afb2SStefano Zampini 
38*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array));
39*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetLocalSize(v2,&nl));
40*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArray(v2,&array2));
41*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(array2,array,nl));
42ca92afb2SStefano Zampini   } else {
43*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArray(v,&array));
44ca92afb2SStefano Zampini     array2 = array;
45ca92afb2SStefano Zampini   }
46ca92afb2SStefano Zampini   if (!sol) { /* change rhs */
47ca92afb2SStefano Zampini     PetscInt n;
48ca92afb2SStefano Zampini     for (n=0;n<ctx->benign_n;n++) {
49ca92afb2SStefano Zampini       PetscScalar    sum = 0.;
50ca92afb2SStefano Zampini       const PetscInt *cols;
51ca92afb2SStefano Zampini       PetscInt       nz,i;
52ca92afb2SStefano Zampini 
53*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
54*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
55ca92afb2SStefano Zampini       for (i=0;i<nz-1;i++) sum += array[cols[i]];
5622db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
5722db5ddcSStefano Zampini       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
5822db5ddcSStefano Zampini #else
59ca92afb2SStefano Zampini       sum = -sum/nz;
6022db5ddcSStefano Zampini #endif
61ca92afb2SStefano Zampini       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
62ca92afb2SStefano Zampini       ctx->benign_save_vals[n] = array2[cols[nz-1]];
63ca92afb2SStefano Zampini       array2[cols[nz-1]] = sum;
64*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
65ca92afb2SStefano Zampini     }
66ca92afb2SStefano Zampini   } else {
67ca92afb2SStefano Zampini     PetscInt n;
68ca92afb2SStefano Zampini     for (n=0;n<ctx->benign_n;n++) {
69ca92afb2SStefano Zampini       PetscScalar    sum = 0.;
70ca92afb2SStefano Zampini       const PetscInt *cols;
71ca92afb2SStefano Zampini       PetscInt       nz,i;
72*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
73*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
74ca92afb2SStefano Zampini       for (i=0;i<nz-1;i++) sum += array[cols[i]];
7522db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
7622db5ddcSStefano Zampini       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
7722db5ddcSStefano Zampini #else
78ca92afb2SStefano Zampini       sum = -sum/nz;
7922db5ddcSStefano Zampini #endif
80ca92afb2SStefano Zampini       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
81ca92afb2SStefano Zampini       array2[cols[nz-1]] = ctx->benign_save_vals[n];
82*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
83ca92afb2SStefano Zampini     }
84ca92afb2SStefano Zampini   }
85ca92afb2SStefano Zampini   if (v2) {
86*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array));
87*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArray(v2,&array2));
88ca92afb2SStefano Zampini   } else {
89*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArray(v,&array));
90ca92afb2SStefano Zampini   }
915cbda25cSStefano Zampini   if (!sol && full) {
925cbda25cSStefano Zampini     Vec      usedv;
935cbda25cSStefano Zampini     PetscInt n_I,size_schur;
945cbda25cSStefano Zampini 
955cbda25cSStefano Zampini     /* get sizes */
96*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
97*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetSize(v,&n_I));
985cbda25cSStefano Zampini     n_I = n_I - size_schur;
995cbda25cSStefano Zampini     /* compute schur rhs correction */
1005cbda25cSStefano Zampini     if (v2) {
1015cbda25cSStefano Zampini       usedv = v2;
1025cbda25cSStefano Zampini     } else {
1035cbda25cSStefano Zampini       usedv = v;
1045cbda25cSStefano Zampini     }
1055cbda25cSStefano Zampini     /* apply schur rhs correction */
106*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work));
107*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(usedv,(const PetscScalar**)&array));
108*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
109*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(usedv,(const PetscScalar**)&array));
110*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec));
111*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecResetArray(ctx->benign_dummy_schur_vec));
1125cbda25cSStefano Zampini   }
113ca92afb2SStefano Zampini   PetscFunctionReturn(0);
114ca92afb2SStefano Zampini }
115ca92afb2SStefano Zampini 
116df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117d62866d3SStefano Zampini {
118df4d28bfSStefano Zampini   PCBDDCReuseSolvers ctx;
119683d3df6SStefano Zampini   PetscBool          copy = PETSC_FALSE;
120d62866d3SStefano Zampini 
121d62866d3SStefano Zampini   PetscFunctionBegin;
122*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCShellGetContext(pc,&ctx));
123683d3df6SStefano Zampini   if (full) {
124d62866d3SStefano Zampini #if defined(PETSC_HAVE_MUMPS)
125*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1));
126d62866d3SStefano Zampini #endif
1275cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
128*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0));
1295cbda25cSStefano Zampini #endif
130683d3df6SStefano Zampini     copy = ctx->has_vertices;
131d4933d67SStefano Zampini   } else { /* interior solver */
1326dba178dSStefano Zampini #if defined(PETSC_HAVE_MUMPS)
133*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMumpsSetIcntl(ctx->F,26,0));
1346dba178dSStefano Zampini #endif
135d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
136*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,1));
137d4933d67SStefano Zampini #endif
138683d3df6SStefano Zampini     copy = PETSC_TRUE;
139683d3df6SStefano Zampini   }
140683d3df6SStefano Zampini   /* copy rhs into factored matrix workspace */
141683d3df6SStefano Zampini   if (copy) {
142ca92afb2SStefano Zampini     PetscInt    n;
143df4d28bfSStefano Zampini     PetscScalar *array,*array_solver;
144ca92afb2SStefano Zampini 
145*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetLocalSize(rhs,&n));
146*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(rhs,(const PetscScalar**)&array));
147*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArray(ctx->rhs,&array_solver));
148*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(array_solver,array,n));
149*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArray(ctx->rhs,&array_solver));
150*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(rhs,(const PetscScalar**)&array));
151683d3df6SStefano Zampini 
152*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full));
153683d3df6SStefano Zampini     if (transpose) {
154*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol));
155683d3df6SStefano Zampini     } else {
156*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSolve(ctx->F,ctx->rhs,ctx->sol));
157683d3df6SStefano Zampini     }
158*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full));
159683d3df6SStefano Zampini 
160683d3df6SStefano Zampini     /* get back data to caller worskpace */
161*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
162*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecGetArray(sol,&array));
163*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(array,array_solver,n));
164*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArray(sol,&array));
165*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
166683d3df6SStefano Zampini   } else {
167ca92afb2SStefano Zampini     if (ctx->benign_n) {
168*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full));
169ca92afb2SStefano Zampini       if (transpose) {
170*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSolveTranspose(ctx->F,ctx->rhs,sol));
171ca92afb2SStefano Zampini       } else {
172*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSolve(ctx->F,ctx->rhs,sol));
173ca92afb2SStefano Zampini       }
174*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full));
175ca92afb2SStefano Zampini     } else {
176e28d306cSStefano Zampini       if (transpose) {
177*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSolveTranspose(ctx->F,rhs,sol));
178e28d306cSStefano Zampini       } else {
179*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSolve(ctx->F,rhs,sol));
180e28d306cSStefano Zampini       }
181683d3df6SStefano Zampini     }
182ca92afb2SStefano Zampini   }
1835cbda25cSStefano Zampini   /* restore defaults */
1845cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS)
185*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMumpsSetIcntl(ctx->F,26,-1));
1865cbda25cSStefano Zampini #endif
187d4933d67SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
188*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatMkl_PardisoSetCntl(ctx->F,70,0));
189d4933d67SStefano Zampini #endif
190d62866d3SStefano Zampini   PetscFunctionReturn(0);
191d62866d3SStefano Zampini }
192d62866d3SStefano Zampini 
193df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194e28d306cSStefano Zampini {
195e28d306cSStefano Zampini   PetscFunctionBegin;
196*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE));
197e28d306cSStefano Zampini   PetscFunctionReturn(0);
198e28d306cSStefano Zampini }
199e28d306cSStefano Zampini 
200df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201e28d306cSStefano Zampini {
202e28d306cSStefano Zampini   PetscFunctionBegin;
203*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE));
204683d3df6SStefano Zampini   PetscFunctionReturn(0);
205683d3df6SStefano Zampini }
206683d3df6SStefano Zampini 
207df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208683d3df6SStefano Zampini {
209683d3df6SStefano Zampini   PetscFunctionBegin;
210*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE));
211683d3df6SStefano Zampini   PetscFunctionReturn(0);
212683d3df6SStefano Zampini }
213683d3df6SStefano Zampini 
214df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215683d3df6SStefano Zampini {
216683d3df6SStefano Zampini   PetscFunctionBegin;
217*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE));
218e28d306cSStefano Zampini   PetscFunctionReturn(0);
219e28d306cSStefano Zampini }
220e28d306cSStefano Zampini 
22115579a77SStefano Zampini static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
22215579a77SStefano Zampini {
22315579a77SStefano Zampini   PCBDDCReuseSolvers ctx;
22415579a77SStefano Zampini   PetscBool          iascii;
22515579a77SStefano Zampini 
22615579a77SStefano Zampini   PetscFunctionBegin;
227*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCShellGetContext(pc,&ctx));
228*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
22915579a77SStefano Zampini   if (iascii) {
230*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO));
23115579a77SStefano Zampini   }
232*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatView(ctx->F,viewer));
23315579a77SStefano Zampini   if (iascii) {
234*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscViewerPopFormat(viewer));
23515579a77SStefano Zampini   }
23615579a77SStefano Zampini   PetscFunctionReturn(0);
23715579a77SStefano Zampini }
23815579a77SStefano Zampini 
239df4d28bfSStefano Zampini static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
240d62866d3SStefano Zampini {
241ca92afb2SStefano Zampini   PetscInt       i;
242d62866d3SStefano Zampini 
243d62866d3SStefano Zampini   PetscFunctionBegin;
244*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&reuse->F));
245*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->sol));
246*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->rhs));
247*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCDestroy(&reuse->interior_solver));
248*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCDestroy(&reuse->correction_solver));
249*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&reuse->is_R));
250*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&reuse->is_B));
251*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterDestroy(&reuse->correction_scatter_B));
252*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->sol_B));
253*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->rhs_B));
254ca92afb2SStefano Zampini   for (i=0;i<reuse->benign_n;i++) {
255*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&reuse->benign_zerodiag_subs[i]));
256ca92afb2SStefano Zampini   }
257*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(reuse->benign_zerodiag_subs));
258*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(reuse->benign_save_vals));
259*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&reuse->benign_csAIB));
260*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&reuse->benign_AIIm1ones));
261*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->benign_corr_work));
262*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&reuse->benign_dummy_schur_vec));
263d62866d3SStefano Zampini   PetscFunctionReturn(0);
264d62866d3SStefano Zampini }
265d5574798SStefano Zampini 
2665ec10c6aSStefano Zampini static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
2673202ece2SStefano Zampini {
2683202ece2SStefano Zampini   Mat            B, C, D, Bd, Cd, AinvBd;
2693202ece2SStefano Zampini   KSP            ksp;
2703202ece2SStefano Zampini   PC             pc;
2713202ece2SStefano Zampini   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
2723202ece2SStefano Zampini   PetscReal      fill = 2.0;
273f11841e3SStefano Zampini   PetscInt       n_I;
2743202ece2SStefano Zampini   PetscMPIInt    size;
2753202ece2SStefano Zampini 
2763202ece2SStefano Zampini   PetscFunctionBegin;
277*5f80ce2aSJacob Faibussowitsch   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size));
2782c71b3e2SJacob Faibussowitsch   PetscCheckFalse(size != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
279f11841e3SStefano Zampini   if (reuse == MAT_REUSE_MATRIX) {
280f11841e3SStefano Zampini     PetscBool Sdense;
281f11841e3SStefano Zampini 
282*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
2832c71b3e2SJacob Faibussowitsch     PetscCheckFalse(!Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
284f11841e3SStefano Zampini   }
285*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
286*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSchurComplementGetKSP(M, &ksp));
287*5f80ce2aSJacob Faibussowitsch   CHKERRQ(KSPGetPC(ksp, &pc));
288*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU));
289*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU));
290*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL));
291*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense));
292*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense));
293*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatGetSize(B,&n_I,NULL));
294f11841e3SStefano Zampini   if (n_I) {
2953202ece2SStefano Zampini     if (!Bdense) {
296*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
2973202ece2SStefano Zampini     } else {
2983202ece2SStefano Zampini       Bd = B;
2993202ece2SStefano Zampini     }
3003202ece2SStefano Zampini 
3013202ece2SStefano Zampini     if (isLU || isILU || isCHOL) {
3023202ece2SStefano Zampini       Mat fact;
303*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPSetUp(ksp));
304*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCFactorGetMatrix(pc, &fact));
305*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
306*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatMatSolve(fact, Bd, AinvBd));
3073202ece2SStefano Zampini     } else {
30807b1e237SStefano Zampini       PetscBool ex = PETSC_TRUE;
30907b1e237SStefano Zampini 
31007b1e237SStefano Zampini       if (ex) {
3113202ece2SStefano Zampini         Mat Ainvd;
3123202ece2SStefano Zampini 
313*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PCComputeOperator(pc, MATDENSE, &Ainvd));
314*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
315*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&Ainvd));
31607b1e237SStefano Zampini       } else {
31707b1e237SStefano Zampini         Vec         sol,rhs;
31807b1e237SStefano Zampini         PetscScalar *arrayrhs,*arraysol;
31907b1e237SStefano Zampini         PetscInt    i,nrhs,n;
32007b1e237SStefano Zampini 
321*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
322*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatGetSize(Bd,&n,&nrhs));
323*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArray(Bd,&arrayrhs));
324*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArray(AinvBd,&arraysol));
325*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetSolution(ksp,&sol));
326*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetRhs(ksp,&rhs));
32707b1e237SStefano Zampini         for (i=0;i<nrhs;i++) {
328*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecPlaceArray(rhs,arrayrhs+i*n));
329*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecPlaceArray(sol,arraysol+i*n));
330*5f80ce2aSJacob Faibussowitsch           CHKERRQ(KSPSolve(ksp,rhs,sol));
331*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecResetArray(rhs));
332*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecResetArray(sol));
33307b1e237SStefano Zampini         }
334*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArray(Bd,&arrayrhs));
335*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArray(AinvBd,&arrayrhs));
33607b1e237SStefano Zampini       }
3373202ece2SStefano Zampini     }
3385ec10c6aSStefano Zampini     if (!Bdense & !issym) {
339*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&Bd));
3403202ece2SStefano Zampini     }
3415ec10c6aSStefano Zampini 
3425ec10c6aSStefano Zampini     if (!issym) {
3433202ece2SStefano Zampini       if (!Cdense) {
344*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
3453202ece2SStefano Zampini       } else {
3463202ece2SStefano Zampini         Cd = C;
3473202ece2SStefano Zampini       }
348*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatMatMult(Cd, AinvBd, reuse, fill, S));
3493202ece2SStefano Zampini       if (!Cdense) {
350*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&Cd));
3513202ece2SStefano Zampini       }
3525ec10c6aSStefano Zampini     } else {
353*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
3545ec10c6aSStefano Zampini       if (!Bdense) {
355*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&Bd));
3565ec10c6aSStefano Zampini       }
3575ec10c6aSStefano Zampini     }
358*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDestroy(&AinvBd));
359f11841e3SStefano Zampini   }
3603202ece2SStefano Zampini 
3613202ece2SStefano Zampini   if (D) {
3623202ece2SStefano Zampini     Mat       Dd;
3633202ece2SStefano Zampini     PetscBool Ddense;
3643202ece2SStefano Zampini 
365*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense));
3663202ece2SStefano Zampini     if (!Ddense) {
367*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
3683202ece2SStefano Zampini     } else {
3693202ece2SStefano Zampini       Dd = D;
3703202ece2SStefano Zampini     }
371f11841e3SStefano Zampini     if (n_I) {
372*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN));
373f11841e3SStefano Zampini     } else {
374f11841e3SStefano Zampini       if (reuse == MAT_INITIAL_MATRIX) {
375*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDuplicate(Dd,MAT_COPY_VALUES,S));
376f11841e3SStefano Zampini       } else {
377*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCopy(Dd,*S,SAME_NONZERO_PATTERN));
378f11841e3SStefano Zampini       }
379f11841e3SStefano Zampini     }
3803202ece2SStefano Zampini     if (!Ddense) {
381*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&Dd));
3823202ece2SStefano Zampini     }
3833202ece2SStefano Zampini   } else {
384*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatScale(*S,-1.0));
3853202ece2SStefano Zampini   }
3863202ece2SStefano Zampini   PetscFunctionReturn(0);
3873202ece2SStefano Zampini }
38834a97f8cSStefano Zampini 
38991af6908SStefano Zampini PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
390b1b3d7a2SStefano Zampini {
391be83ff47SStefano Zampini   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
392be83ff47SStefano Zampini   Mat                    S_all;
39357a87bf3SStefano Zampini   Vec                    gstash,lstash;
39457a87bf3SStefano Zampini   VecScatter             sstash;
395b7ab4a40SStefano Zampini   IS                     is_I,is_I_layer;
396dc456d91SStefano Zampini   IS                     all_subsets,all_subsets_mult,all_subsets_n;
39757a87bf3SStefano Zampini   PetscScalar            *stasharray,*Bwork;
398dc456d91SStefano Zampini   PetscInt               *nnz,*all_local_idx_N;
399dc456d91SStefano Zampini   PetscInt               *auxnum1,*auxnum2;
4005a95e1ceSStefano Zampini   PetscInt               i,subset_size,max_subset_size;
401683d3df6SStefano Zampini   PetscInt               n_B,extra,local_size,global_size;
40257a87bf3SStefano Zampini   PetscInt               local_stash_size;
40308122e43SStefano Zampini   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
4045a95e1ceSStefano Zampini   MPI_Comm               comm_n;
405f4f7d9d6SStefano Zampini   PetscBool              deluxe = PETSC_TRUE;
406f4f7d9d6SStefano Zampini   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
4073b03f7bbSStefano Zampini   PetscViewer            matl_dbg_viewer = NULL;
408b1b3d7a2SStefano Zampini   PetscErrorCode         ierr;
40935d0533cSStefano Zampini   PetscBool              flg;
410b1b3d7a2SStefano Zampini 
411b1b3d7a2SStefano Zampini   PetscFunctionBegin;
412*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->A));
413*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->S));
414e62b6521Sstefano_zampini   if (Ain) {
415*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectReference((PetscObject)Ain));
416a64f4aa4SStefano Zampini     sub_schurs->A = Ain;
417a64f4aa4SStefano Zampini   }
4183301b35fSStefano Zampini 
419*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)Sin));
420a64f4aa4SStefano Zampini   sub_schurs->S = Sin;
421df4d28bfSStefano Zampini   if (sub_schurs->schur_explicit) {
422df4d28bfSStefano Zampini     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
423a64f4aa4SStefano Zampini   }
424a64f4aa4SStefano Zampini 
4255a95e1ceSStefano Zampini   /* preliminary checks */
4262c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!sub_schurs->schur_explicit && compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
4275a95e1ceSStefano Zampini 
42888113c35SStefano Zampini   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
42988113c35SStefano Zampini 
4303b03f7bbSStefano Zampini   /* debug (MATLAB) */
4317f9db97bSStefano Zampini   if (sub_schurs->debug) {
4327f9db97bSStefano Zampini     PetscMPIInt size,rank;
4337ebab0bbSStefano Zampini     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
4347f9db97bSStefano Zampini 
435*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size));
436*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
4377f9db97bSStefano Zampini     nr   = size;
438*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(nr,&print_schurs_ranks));
4397f9db97bSStefano Zampini     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr);
440*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg));
4417f9db97bSStefano Zampini     if (!flg) print_schurs = PETSC_TRUE;
4427f9db97bSStefano Zampini     else {
4437ebab0bbSStefano Zampini       print_schurs = PETSC_FALSE;
4447f9db97bSStefano Zampini       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
4457f9db97bSStefano Zampini     }
4467f9db97bSStefano Zampini     ierr = PetscOptionsEnd();CHKERRQ(ierr);
447*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(print_schurs_ranks));
4483b03f7bbSStefano Zampini     if (print_schurs) {
4493b03f7bbSStefano Zampini       char filename[256];
4503b03f7bbSStefano Zampini 
451*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank));
452*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer));
453*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB));
4543b03f7bbSStefano Zampini     }
4557f9db97bSStefano Zampini   }
4567f9db97bSStefano Zampini 
4575a95e1ceSStefano Zampini   /* restrict work on active processes */
458991c41b4SStefano Zampini   if (sub_schurs->restrict_comm) {
459991c41b4SStefano Zampini     PetscSubcomm subcomm;
460991c41b4SStefano Zampini     PetscMPIInt  color,rank;
461991c41b4SStefano Zampini 
4625a95e1ceSStefano Zampini     color = 0;
4635a95e1ceSStefano Zampini     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
464*5f80ce2aSJacob Faibussowitsch     CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
465*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm));
466*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSubcommSetNumber(subcomm,2));
467*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSubcommSetTypeGeneral(subcomm,color,rank));
468*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL));
469*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscSubcommDestroy(&subcomm));
4705a95e1ceSStefano Zampini     if (!sub_schurs->n_subs) {
471*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscCommDestroy(&comm_n));
4725a95e1ceSStefano Zampini       PetscFunctionReturn(0);
4735a95e1ceSStefano Zampini     }
474991c41b4SStefano Zampini   } else {
475*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL));
476991c41b4SStefano Zampini   }
4775a95e1ceSStefano Zampini 
478b1b3d7a2SStefano Zampini   /* get Schur complement matrices */
479df4d28bfSStefano Zampini   if (!sub_schurs->schur_explicit) {
480a64f4aa4SStefano Zampini     Mat       tA_IB,tA_BI,tA_BB;
4813301b35fSStefano Zampini     PetscBool isseqsbaij;
482*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB));
483*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij));
4843301b35fSStefano Zampini     if (isseqsbaij) {
485*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB));
486*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB));
487*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI));
488a64f4aa4SStefano Zampini     } else {
489*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)tA_BB));
490a64f4aa4SStefano Zampini       A_BB = tA_BB;
491*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)tA_IB));
492a64f4aa4SStefano Zampini       A_IB = tA_IB;
493*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)tA_BI));
494a64f4aa4SStefano Zampini       A_BI = tA_BI;
495f11841e3SStefano Zampini     }
496a58a30b4SStefano Zampini   } else {
4975a95e1ceSStefano Zampini     A_II = NULL;
4985a95e1ceSStefano Zampini     A_IB = NULL;
4995a95e1ceSStefano Zampini     A_BI = NULL;
5005a95e1ceSStefano Zampini     A_BB = NULL;
501b1b3d7a2SStefano Zampini   }
5025a95e1ceSStefano Zampini   S_all = NULL;
503b1b3d7a2SStefano Zampini 
504b1b3d7a2SStefano Zampini   /* determine interior problems */
505*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&i));
5063dc780c3SStefano Zampini   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
507b1b3d7a2SStefano Zampini     PetscBT                touched;
508b1b3d7a2SStefano Zampini     const PetscInt*        idx_B;
509b1b3d7a2SStefano Zampini     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
510b1b3d7a2SStefano Zampini 
5112c71b3e2SJacob Faibussowitsch     PetscCheckFalse(!xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
512b1b3d7a2SStefano Zampini     /* get sizes */
513*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&n_I));
514*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B));
515b1b3d7a2SStefano Zampini 
516*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(n_I+n_B,&local_numbering));
517*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBTCreate(n_I+n_B,&touched));
518*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBTMemzero(n_I+n_B,touched));
519b1b3d7a2SStefano Zampini 
520b1b3d7a2SStefano Zampini     /* all boundary dofs must be skipped when adding layers */
521*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetIndices(sub_schurs->is_B,&idx_B));
522b1b3d7a2SStefano Zampini     for (j=0;j<n_B;j++) {
523*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscBTSet(touched,idx_B[j]));
524b1b3d7a2SStefano Zampini     }
525*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(local_numbering,idx_B,n_B));
526*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISRestoreIndices(sub_schurs->is_B,&idx_B));
527b1b3d7a2SStefano Zampini 
528b1b3d7a2SStefano Zampini     /* add prescribed number of layers of dofs */
529b1b3d7a2SStefano Zampini     n_local_dofs = n_B;
530b1b3d7a2SStefano Zampini     n_prev_added = n_B;
531b1b3d7a2SStefano Zampini     for (layer=0;layer<nlayers;layer++) {
532b6bace71SJacob Faibussowitsch       PetscInt n_added = 0;
533b1b3d7a2SStefano Zampini       if (n_local_dofs == n_I+n_B) break;
5342c71b3e2SJacob Faibussowitsch       PetscCheckFalse(n_local_dofs > n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
535*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added));
536b1b3d7a2SStefano Zampini       n_prev_added = n_added;
537b1b3d7a2SStefano Zampini       n_local_dofs += n_added;
538b1b3d7a2SStefano Zampini       if (!n_added) break;
539b1b3d7a2SStefano Zampini     }
540*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBTDestroy(&touched));
541b1b3d7a2SStefano Zampini 
542883469d8SStefano Zampini     /* IS for I layer dofs in original numbering */
543*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer));
544*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(local_numbering));
545*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISSort(is_I_layer));
546883469d8SStefano Zampini     /* IS for I layer dofs in I numbering */
547df4d28bfSStefano Zampini     if (!sub_schurs->schur_explicit) {
548b1b3d7a2SStefano Zampini       ISLocalToGlobalMapping ItoNmap;
549*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap));
550*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I));
551*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingDestroy(&ItoNmap));
552b1b3d7a2SStefano Zampini 
553b1b3d7a2SStefano Zampini       /* II block */
554*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II));
555b1b3d7a2SStefano Zampini     }
556b1b3d7a2SStefano Zampini   } else {
557b1b3d7a2SStefano Zampini     PetscInt n_I;
558b1b3d7a2SStefano Zampini 
559b1b3d7a2SStefano Zampini     /* IS for I dofs in original numbering */
560*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->is_I));
561a9b99552SStefano Zampini     is_I_layer = sub_schurs->is_I;
562b1b3d7a2SStefano Zampini 
563b1b3d7a2SStefano Zampini     /* IS for I dofs in I numbering (strided 1) */
564df4d28bfSStefano Zampini     if (!sub_schurs->schur_explicit) {
565*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetSize(sub_schurs->is_I,&n_I));
566*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I));
567b1b3d7a2SStefano Zampini 
568b1b3d7a2SStefano Zampini       /* II block is the same */
569*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)A_II));
570b1b3d7a2SStefano Zampini       AE_II = A_II;
571b1b3d7a2SStefano Zampini     }
572b1b3d7a2SStefano Zampini   }
5735a95e1ceSStefano Zampini 
574883469d8SStefano Zampini   /* Get info on subset sizes and sum of all subsets sizes */
5755a95e1ceSStefano Zampini   max_subset_size = 0;
576883469d8SStefano Zampini   local_size = 0;
5775a95e1ceSStefano Zampini   for (i=0;i<sub_schurs->n_subs;i++) {
578*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
5795a95e1ceSStefano Zampini     max_subset_size = PetscMax(subset_size,max_subset_size);
580883469d8SStefano Zampini     local_size += subset_size;
581883469d8SStefano Zampini   }
582883469d8SStefano Zampini 
583883469d8SStefano Zampini   /* Work arrays for local indices */
584883469d8SStefano Zampini   extra = 0;
585*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISGetLocalSize(sub_schurs->is_B,&n_B));
586df4d28bfSStefano Zampini   if (sub_schurs->schur_explicit && is_I_layer) {
587*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(is_I_layer,&extra));
588883469d8SStefano Zampini   }
589*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(n_B+extra,&all_local_idx_N));
590883469d8SStefano Zampini   if (extra) {
591883469d8SStefano Zampini     const PetscInt *idxs;
592*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetIndices(is_I_layer,&idxs));
593*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(all_local_idx_N,idxs,extra));
594*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISRestoreIndices(is_I_layer,&idxs));
595883469d8SStefano Zampini   }
596*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum1));
597*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&auxnum2));
598883469d8SStefano Zampini 
599883469d8SStefano Zampini   /* Get local indices in local numbering */
600883469d8SStefano Zampini   local_size = 0;
60157a87bf3SStefano Zampini   local_stash_size = 0;
6025a95e1ceSStefano Zampini   for (i=0;i<sub_schurs->n_subs;i++) {
603883469d8SStefano Zampini     const PetscInt *idxs;
604883469d8SStefano Zampini 
605*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
606*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetIndices(sub_schurs->is_subs[i],&idxs));
607eb595f79SStefano Zampini     /* start (smallest in global ordering) and multiplicity */
608eb595f79SStefano Zampini     auxnum1[i] = idxs[0];
60957a87bf3SStefano Zampini     auxnum2[i] = subset_size*subset_size;
610883469d8SStefano Zampini     /* subset indices in local numbering */
611*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size));
612*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
613883469d8SStefano Zampini     local_size += subset_size;
61457a87bf3SStefano Zampini     local_stash_size += subset_size*subset_size;
615883469d8SStefano Zampini   }
616883469d8SStefano Zampini 
617f4f7d9d6SStefano Zampini   /* allocate extra workspace needed only for GETRI or SYTRF */
61811955456SStefano Zampini   use_potr = use_sytr = PETSC_FALSE;
61911955456SStefano Zampini   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
620f4f7d9d6SStefano Zampini     use_potr = PETSC_TRUE;
62111955456SStefano Zampini   } else if (sub_schurs->is_symmetric) {
62211955456SStefano Zampini     use_sytr = PETSC_TRUE;
62311955456SStefano Zampini   }
62411955456SStefano Zampini   if (local_size && !use_potr) {
62559ac4de7SStefano Zampini     PetscScalar  lwork,dummyscalar = 0.;
62659ac4de7SStefano Zampini     PetscBLASInt dummyint = 0;
627d2627357SStefano Zampini 
628d2627357SStefano Zampini     B_lwork = -1;
629*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast(local_size,&B_N));
630*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
631f4f7d9d6SStefano Zampini     if (use_sytr) {
632f4f7d9d6SStefano Zampini       PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
6332c71b3e2SJacob Faibussowitsch       PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
634f4f7d9d6SStefano Zampini     } else {
63559ac4de7SStefano Zampini       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
6362c71b3e2SJacob Faibussowitsch       PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
637f4f7d9d6SStefano Zampini     }
638*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFPTrapPop());
639*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
640*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots));
641056290a2SStefano Zampini   } else {
642056290a2SStefano Zampini     Bwork = NULL;
643056290a2SStefano Zampini     pivots = NULL;
644d2627357SStefano Zampini   }
645d2627357SStefano Zampini 
64657a87bf3SStefano Zampini   /* prepare data for summing up properly schurs on subsets */
647*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n));
648*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets));
649*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&all_subsets_n));
650*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult));
651*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n));
652*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&all_subsets));
653*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&all_subsets_mult));
654*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISGetLocalSize(all_subsets_n,&i));
6552c71b3e2SJacob Faibussowitsch   PetscCheckFalse(i != local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_stash_size);
656*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash));
657*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash));
658*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash));
659*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&all_subsets_n));
6602972d61bSStefano Zampini 
6615a95e1ceSStefano Zampini   /* subset indices in local boundary numbering */
6625a95e1ceSStefano Zampini   if (!sub_schurs->is_Ej_all) {
6635a95e1ceSStefano Zampini     PetscInt *all_local_idx_B;
6645a95e1ceSStefano Zampini 
665*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(local_size,&all_local_idx_B));
666*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B));
6672c71b3e2SJacob Faibussowitsch     PetscCheckFalse(subset_size != local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size);
668*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all));
669b1b3d7a2SStefano Zampini   }
670b1b3d7a2SStefano Zampini 
67172b8c272SStefano Zampini   if (change) {
67272b8c272SStefano Zampini     ISLocalToGlobalMapping BtoS;
67372b8c272SStefano Zampini     IS                     change_primal_B;
67472b8c272SStefano Zampini     IS                     change_primal_all;
67572b8c272SStefano Zampini 
6762c71b3e2SJacob Faibussowitsch     PetscCheckFalse(sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
6772c71b3e2SJacob Faibussowitsch     PetscCheckFalse(sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
678*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub));
67972b8c272SStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
68072b8c272SStefano Zampini       ISLocalToGlobalMapping NtoS;
681*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS));
682*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]));
683*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingDestroy(&NtoS));
68472b8c272SStefano Zampini     }
685*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B));
686*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS));
687*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all));
688*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISLocalToGlobalMappingDestroy(&BtoS));
689*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&change_primal_B));
690*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change));
69172b8c272SStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
69272b8c272SStefano Zampini       Mat change_sub;
69372b8c272SStefano Zampini 
694*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
695*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]));
696*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPSetType(sub_schurs->change[i],KSPPREONLY));
69772b8c272SStefano Zampini       if (!sub_schurs->change_with_qr) {
698*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub));
69972b8c272SStefano Zampini       } else {
70072b8c272SStefano Zampini         Mat change_subt;
701*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt));
702*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub));
703*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&change_subt));
70472b8c272SStefano Zampini       }
705*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub));
706*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&change_sub));
707*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix));
708*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_"));
70972b8c272SStefano Zampini     }
710*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&change_primal_all));
71172b8c272SStefano Zampini   }
71272b8c272SStefano Zampini 
7135a95e1ceSStefano Zampini   /* Local matrix of all local Schur on subsets (transposed) */
7145a95e1ceSStefano Zampini   if (!sub_schurs->S_Ej_all) {
71504c5b2e6SStefano Zampini     Mat         T;
71604c5b2e6SStefano Zampini     PetscScalar *v;
71704c5b2e6SStefano Zampini     PetscInt    *ii,*jj;
71804c5b2e6SStefano Zampini     PetscInt    cum,i,j,k;
71904c5b2e6SStefano Zampini 
72004c5b2e6SStefano Zampini     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
72104c5b2e6SStefano Zampini        Allocate properly a representative matrix and duplicate */
722*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v));
72304c5b2e6SStefano Zampini     ii[0] = 0;
72404c5b2e6SStefano Zampini     cum   = 0;
72504c5b2e6SStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
726*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
72704c5b2e6SStefano Zampini       for (j=0;j<subset_size;j++) {
72804c5b2e6SStefano Zampini         const PetscInt row = cum+j;
72904c5b2e6SStefano Zampini         PetscInt col = cum;
73004c5b2e6SStefano Zampini 
73104c5b2e6SStefano Zampini         ii[row+1] = ii[row] + subset_size;
73204c5b2e6SStefano Zampini         for (k=ii[row];k<ii[row+1];k++) {
73304c5b2e6SStefano Zampini           jj[k] = col;
73404c5b2e6SStefano Zampini           col++;
73504c5b2e6SStefano Zampini         }
73604c5b2e6SStefano Zampini       }
73704c5b2e6SStefano Zampini       cum += subset_size;
73804c5b2e6SStefano Zampini     }
739*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T));
740*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all));
741*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDestroy(&T));
742*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree3(ii,jj,v));
74304c5b2e6SStefano Zampini   }
74404c5b2e6SStefano Zampini   /* matrices for deluxe scaling and adaptive selection */
74504c5b2e6SStefano Zampini   if (compute_Stilda) {
74604c5b2e6SStefano Zampini     if (!sub_schurs->sum_S_Ej_tilda_all) {
747*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all));
74804c5b2e6SStefano Zampini     }
74904c5b2e6SStefano Zampini     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
750*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all));
75104c5b2e6SStefano Zampini     }
752aa83b6aeSStefano Zampini   }
753b1b3d7a2SStefano Zampini 
7545a95e1ceSStefano Zampini   /* Compute Schur complements explicitly */
755be83ff47SStefano Zampini   F = NULL;
756d943a642SStefano Zampini   if (!sub_schurs->schur_explicit) {
757d943a642SStefano Zampini     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
758d943a642SStefano Zampini        it is not efficient, unless the economic version of the scaling is used */
7595a95e1ceSStefano Zampini     Mat         S_Ej_expl;
7605a95e1ceSStefano Zampini     PetscScalar *work;
7615a95e1ceSStefano Zampini     PetscInt    j,*dummy_idx;
7625a95e1ceSStefano Zampini     PetscBool   Sdense;
7635a95e1ceSStefano Zampini 
764*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work));
7655a95e1ceSStefano Zampini     local_size = 0;
766b1b3d7a2SStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
7675a95e1ceSStefano Zampini       IS  is_subset_B;
7685a95e1ceSStefano Zampini       Mat AE_EE,AE_IE,AE_EI,S_Ej;
7695a95e1ceSStefano Zampini 
7705a95e1ceSStefano Zampini       /* subsets in original and boundary numbering */
771*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B));
7725a95e1ceSStefano Zampini       /* EE block */
773*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE));
7745a95e1ceSStefano Zampini       /* IE block */
775*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE));
7765a95e1ceSStefano Zampini       /* EI block */
777d943a642SStefano Zampini       if (sub_schurs->is_symmetric) {
778*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateTranspose(AE_IE,&AE_EI));
779d943a642SStefano Zampini       } else if (sub_schurs->is_hermitian) {
780*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateHermitianTranspose(AE_IE,&AE_EI));
7815a95e1ceSStefano Zampini       } else {
782*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI));
7835a95e1ceSStefano Zampini       }
784*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&is_subset_B));
785*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej));
786*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&AE_EE));
787*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&AE_IE));
788*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&AE_EI));
789b1b3d7a2SStefano Zampini       if (AE_II == A_II) { /* we can reuse the same ksp */
790b1b3d7a2SStefano Zampini         KSP ksp;
791*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&ksp));
792*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSchurComplementSetKSP(S_Ej,ksp));
793b1b3d7a2SStefano Zampini       } else { /* build new ksp object which inherits ksp and pc types from the original one */
794b1b3d7a2SStefano Zampini         KSP       origksp,schurksp;
795b1b3d7a2SStefano Zampini         PC        origpc,schurpc;
796b1b3d7a2SStefano Zampini         KSPType   ksp_type;
797b1b3d7a2SStefano Zampini         PetscInt  n_internal;
7985a95e1ceSStefano Zampini         PetscBool ispcnone;
799b1b3d7a2SStefano Zampini 
800*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSchurComplementGetKSP(sub_schurs->S,&origksp));
801*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSchurComplementGetKSP(S_Ej,&schurksp));
802*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetType(origksp,&ksp_type));
803*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPSetType(schurksp,ksp_type));
804*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetPC(schurksp,&schurpc));
805*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetPC(origksp,&origpc));
806*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone));
8075a95e1ceSStefano Zampini         if (!ispcnone) {
8085a95e1ceSStefano Zampini           PCType pc_type;
809*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PCGetType(origpc,&pc_type));
810*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PCSetType(schurpc,pc_type));
8115a95e1ceSStefano Zampini         } else {
812*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PCSetType(schurpc,PCLU));
8135a95e1ceSStefano Zampini         }
814*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetSize(is_I,&n_internal));
815365a3a41SStefano Zampini         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
8163ca39a21SBarry Smith           MatSolverType solver = NULL;
817*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver));
818b1b3d7a2SStefano Zampini           if (solver) {
819*5f80ce2aSJacob Faibussowitsch             CHKERRQ(PCFactorSetMatSolverType(schurpc,solver));
820b1b3d7a2SStefano Zampini           }
821b1b3d7a2SStefano Zampini         }
822*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPSetUp(schurksp));
823b1b3d7a2SStefano Zampini       }
824*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
825*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl));
826*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl));
827*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense));
8285a95e1ceSStefano Zampini       if (Sdense) {
8295a95e1ceSStefano Zampini         for (j=0;j<subset_size;j++) {
8305a95e1ceSStefano Zampini           dummy_idx[j]=local_size+j;
831b1b3d7a2SStefano Zampini         }
832*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES));
8336c4ed002SBarry Smith       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
834*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&S_Ej));
835*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&S_Ej_expl));
8365a95e1ceSStefano Zampini       local_size += subset_size;
8375a95e1ceSStefano Zampini     }
838*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree2(dummy_idx,work));
839b1b3d7a2SStefano Zampini     /* free */
840*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&is_I));
841*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDestroy(&AE_II));
842*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(all_local_idx_N));
843883469d8SStefano Zampini   } else {
8445cbda25cSStefano Zampini     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
8459d54b7f4SStefano Zampini     Vec               Dall = NULL;
846ca92afb2SStefano Zampini     IS                is_A_all,*is_p_r = NULL;
8477ebab0bbSStefano Zampini     MatType           Stype;
8485cbda25cSStefano Zampini     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
84904c5b2e6SStefano Zampini     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
8501683a169SBarry Smith     const PetscScalar *rS_data;
85104c5b2e6SStefano Zampini     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
8523fc34f97SStefano Zampini     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
8533fc34f97SStefano Zampini     PetscBool         schur_has_vertices,factor_workaround;
85411955456SStefano Zampini     PetscBool         use_cholesky;
8557ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
8567ebab0bbSStefano Zampini     PetscBool         oldpin;
8577ebab0bbSStefano Zampini #endif
858883469d8SStefano Zampini 
859683d3df6SStefano Zampini     /* get sizes */
86081ea8064SStefano Zampini     n_I = 0;
86181ea8064SStefano Zampini     if (is_I_layer) {
862*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(is_I_layer,&n_I));
86381ea8064SStefano Zampini     }
864683d3df6SStefano Zampini     economic = PETSC_FALSE;
865*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_I,&cum));
866683d3df6SStefano Zampini     if (cum != n_I) economic = PETSC_TRUE;
867*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatGetLocalSize(sub_schurs->A,&n,NULL));
8689d54b7f4SStefano Zampini     size_active_schur = local_size;
8699d54b7f4SStefano Zampini 
870f17d2ae1SStefano Zampini     /* import scaling vector (wrong formulation if we have 3D edges) */
8719d54b7f4SStefano Zampini     if (scaling && compute_Stilda) {
8729d54b7f4SStefano Zampini       const PetscScalar *array;
8739d54b7f4SStefano Zampini       PetscScalar       *array2;
8749d54b7f4SStefano Zampini       const PetscInt    *idxs;
8759d54b7f4SStefano Zampini       PetscInt          i;
8769d54b7f4SStefano Zampini 
877*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
878*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall));
879*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArrayRead(scaling,&array));
880*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetArray(Dall,&array2));
8819d54b7f4SStefano Zampini       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
882*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecRestoreArray(Dall,&array2));
883*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecRestoreArrayRead(scaling,&array));
884*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
8859d54b7f4SStefano Zampini       deluxe = PETSC_FALSE;
8869d54b7f4SStefano Zampini     }
887d62866d3SStefano Zampini 
888683d3df6SStefano Zampini     /* size active schurs does not count any dirichlet or vertex dof on the interface */
8893fc34f97SStefano Zampini     factor_workaround = PETSC_FALSE;
8903fc34f97SStefano Zampini     schur_has_vertices = PETSC_FALSE;
891683d3df6SStefano Zampini     cum = n_I+size_active_schur;
892683d3df6SStefano Zampini     if (sub_schurs->is_dir) {
893683d3df6SStefano Zampini       const PetscInt* idxs;
894683d3df6SStefano Zampini       PetscInt        n_dir;
895683d3df6SStefano Zampini 
896*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
897*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetIndices(sub_schurs->is_dir,&idxs));
898*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir));
899*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISRestoreIndices(sub_schurs->is_dir,&idxs));
900683d3df6SStefano Zampini       cum += n_dir;
9013fc34f97SStefano Zampini       factor_workaround = PETSC_TRUE;
902d62866d3SStefano Zampini     }
903683d3df6SStefano Zampini     /* include the primal vertices in the Schur complement */
904367aa537SStefano Zampini     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
905683d3df6SStefano Zampini       PetscInt n_v;
906683d3df6SStefano Zampini 
907*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
908683d3df6SStefano Zampini       if (n_v) {
909683d3df6SStefano Zampini         const PetscInt* idxs;
910683d3df6SStefano Zampini 
911*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetIndices(sub_schurs->is_vertices,&idxs));
912*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArraycpy(all_local_idx_N+cum,idxs,n_v));
913*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
914683d3df6SStefano Zampini         cum += n_v;
915683d3df6SStefano Zampini         factor_workaround = PETSC_TRUE;
9163fc34f97SStefano Zampini         schur_has_vertices = PETSC_TRUE;
917683d3df6SStefano Zampini       }
918683d3df6SStefano Zampini     }
919683d3df6SStefano Zampini     size_schur = cum - n_I;
920*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all));
9217ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
922b470e4b4SRichard Tran Mills     oldpin = sub_schurs->A->boundtocpu;
923*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatBindToCPU(sub_schurs->A,PETSC_TRUE));
9247ebab0bbSStefano Zampini #endif
925683d3df6SStefano Zampini     if (cum == n) {
926*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISSetPermutation(is_A_all));
927*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A));
928683d3df6SStefano Zampini     } else {
929*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A));
930683d3df6SStefano Zampini     }
9317ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
932*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatBindToCPU(sub_schurs->A,oldpin));
9337ebab0bbSStefano Zampini #endif
934*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetOptionsPrefix(A,sub_schurs->prefix));
935*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAppendOptionsPrefix(A,"sub_schurs_"));
936ca92afb2SStefano Zampini 
937ca92afb2SStefano Zampini     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
9387ebab0bbSStefano Zampini        this is a workaround */
939ca92afb2SStefano Zampini     if (benign_n) {
9407ebab0bbSStefano Zampini       Vec                    v,benign_AIIm1_ones;
941ca92afb2SStefano Zampini       ISLocalToGlobalMapping N_to_reor;
942ca92afb2SStefano Zampini       IS                     is_p0,is_p0_p;
9435cbda25cSStefano Zampini       PetscScalar            *cs_AIB,*AIIm1_data;
9445cbda25cSStefano Zampini       PetscInt               sizeA;
945ca92afb2SStefano Zampini 
946*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor));
947*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
948*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p));
949*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&is_p0));
950*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones));
951*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecGetSize(v,&sizeA));
952*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat));
953*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat));
954*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseGetArray(cs_AIB_mat,&cs_AIB));
955*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
956*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscMalloc1(benign_n,&is_p_r));
957ca92afb2SStefano Zampini       /* compute colsum of A_IB restricted to pressures */
958ca92afb2SStefano Zampini       for (i=0;i<benign_n;i++) {
9597ebab0bbSStefano Zampini         const PetscScalar *array;
960ca92afb2SStefano Zampini         const PetscInt    *idxs;
961ca92afb2SStefano Zampini         PetscInt          j,nz;
962ca92afb2SStefano Zampini 
963*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]));
964*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetLocalSize(is_p_r[i],&nz));
965*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetIndices(is_p_r[i],&idxs));
9665cbda25cSStefano Zampini         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
967*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs));
968*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
969*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMult(A,benign_AIIm1_ones,v));
970*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecResetArray(benign_AIIm1_ones));
971*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArrayRead(v,&array));
97222db5ddcSStefano Zampini         for (j=0;j<size_schur;j++) {
97322db5ddcSStefano Zampini #if defined(PETSC_USE_COMPLEX)
97422db5ddcSStefano Zampini           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
97522db5ddcSStefano Zampini #else
97622db5ddcSStefano Zampini           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
97722db5ddcSStefano Zampini #endif
97822db5ddcSStefano Zampini         }
979*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArrayRead(v,&array));
980ca92afb2SStefano Zampini       }
981*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB));
982*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
983*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&v));
984*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&benign_AIIm1_ones));
985*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
986*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
987*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
988*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL));
989*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&is_p0_p));
990*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISLocalToGlobalMappingDestroy(&N_to_reor));
991ca92afb2SStefano Zampini     }
992*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric));
993*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian));
994*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef));
995883469d8SStefano Zampini 
99611955456SStefano Zampini     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
99711955456SStefano Zampini     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
99811955456SStefano Zampini 
999683d3df6SStefano Zampini     /* when using the benign subspace trick, the local Schur complements are SPD */
100035d0533cSStefano Zampini     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
100135d0533cSStefano Zampini        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
100235d0533cSStefano Zampini     if (benign_trick) {
100335d0533cSStefano Zampini       sub_schurs->is_posdef = PETSC_TRUE;
1004*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg));
100535d0533cSStefano Zampini       if (flg) use_cholesky = PETSC_FALSE;
100635d0533cSStefano Zampini     }
1007d47842beSStefano Zampini 
1008f4f7d9d6SStefano Zampini     if (n_I) {
10090aa714b2SStefano Zampini       IS        is_schur;
10107ebab0bbSStefano Zampini       char      stype[64];
10114ba54290SStefano Zampini       PetscBool gpu = PETSC_FALSE;
10125a05ddb0SStefano Zampini 
101311955456SStefano Zampini       if (use_cholesky) {
1014*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F));
1015883469d8SStefano Zampini       } else {
1016*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F));
1017883469d8SStefano Zampini       }
1018*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetErrorIfFailure(A,PETSC_TRUE));
101935d0533cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1020*5f80ce2aSJacob Faibussowitsch       if (benign_trick) CHKERRQ(MatMkl_PardisoSetCntl(F,10,10));
102135d0533cSStefano Zampini #endif
1022883469d8SStefano Zampini       /* subsets ordered last */
1023*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur));
1024*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorSetSchurIS(F,is_schur));
1025*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&is_schur));
1026883469d8SStefano Zampini 
1027883469d8SStefano Zampini       /* factorization step */
102811955456SStefano Zampini       if (use_cholesky) {
1029*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCholeskyFactorSymbolic(F,A,NULL,NULL));
1030be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1031*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMumpsSetIcntl(F,19,2));
1032be83ff47SStefano Zampini #endif
1033*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCholeskyFactorNumeric(F,A,NULL));
1034a0b0af32SStefano Zampini         S_lower_triangular = PETSC_TRUE;
1035883469d8SStefano Zampini       } else {
1036*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatLUFactorSymbolic(F,A,NULL,NULL,NULL));
1037be83ff47SStefano Zampini #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1038*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMumpsSetIcntl(F,19,3));
1039be83ff47SStefano Zampini #endif
1040*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatLUFactorNumeric(F,A,NULL));
1041883469d8SStefano Zampini       }
1042*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view"));
1043883469d8SStefano Zampini 
10443b03f7bbSStefano Zampini       if (matl_dbg_viewer) {
104511955456SStefano Zampini         Mat S;
104611955456SStefano Zampini         IS  is;
104711955456SStefano Zampini 
1048*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectSetName((PetscObject)A,"A"));
1049*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatView(A,matl_dbg_viewer));
1050*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL));
1051*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectSetName((PetscObject)S,"S"));
1052*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatView(S,matl_dbg_viewer));
1053*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&S));
1054*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is));
1055*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectSetName((PetscObject)is,"I"));
1056*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISView(is,matl_dbg_viewer));
1057*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISDestroy(&is));
1058*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is));
1059*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectSetName((PetscObject)is,"B"));
1060*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISView(is,matl_dbg_viewer));
1061*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISDestroy(&is));
1062*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectSetName((PetscObject)is_A_all,"IA"));
1063*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISView(is_A_all,matl_dbg_viewer));
106411955456SStefano Zampini       }
106511955456SStefano Zampini 
1066883469d8SStefano Zampini       /* get explicit Schur Complement computed during numeric factorization */
1067*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL));
1068*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype)));
10694ba54290SStefano Zampini #if defined(PETSC_HAVE_CUDA)
1070*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,""));
10714ba54290SStefano Zampini #endif
10727ebab0bbSStefano Zampini       if (gpu) {
1073*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype)));
10747ebab0bbSStefano Zampini       }
1075*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL));
1076*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all));
1077*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef));
1078*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian));
1079*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatGetType(S_all,&Stype));
1080b3cb21ddSStefano Zampini 
1081d62866d3SStefano Zampini       /* we can reuse the solvers if we are not using the economic version */
1082683d3df6SStefano Zampini       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1083683d3df6SStefano Zampini       factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
108403dfb2d7SStefano Zampini       if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
108503dfb2d7SStefano Zampini         reuse_solvers = factor_workaround = PETSC_FALSE;
108603dfb2d7SStefano Zampini 
1087df4d28bfSStefano Zampini       solver_S = PETSC_TRUE;
1088ca92afb2SStefano Zampini 
108972b8c272SStefano Zampini       /* update the Schur complement with the change of basis on the pressures */
1090ca92afb2SStefano Zampini       if (benign_n) {
10917ebab0bbSStefano Zampini         const PetscScalar *cs_AIB;
10927ebab0bbSStefano Zampini         PetscScalar       *S_data,*AIIm1_data;
10933b03f7bbSStefano Zampini         Mat               S2 = NULL,S3 = NULL; /* dbg */
10943b03f7bbSStefano Zampini         PetscScalar       *S2_data,*S3_data; /* dbg */
10957ebab0bbSStefano Zampini         Vec               v,benign_AIIm1_ones;
10965cbda25cSStefano Zampini         PetscInt          sizeA;
1097ca92afb2SStefano Zampini 
1098*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArray(S_all,&S_data));
1099*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateVecs(A,&v,&benign_AIIm1_ones));
1100*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetSize(v,&sizeA));
1101ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MUMPS)
1102*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMumpsSetIcntl(F,26,0));
1103ca92afb2SStefano Zampini #endif
1104ca92afb2SStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1105*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMkl_PardisoSetCntl(F,70,1));
1106ca92afb2SStefano Zampini #endif
1107*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB));
1108*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
11093b03f7bbSStefano Zampini         if (matl_dbg_viewer) {
1110*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2));
1111*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3));
1112*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseGetArray(S2,&S2_data));
1113*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseGetArray(S3,&S3_data));
11143b03f7bbSStefano Zampini         }
1115ca92afb2SStefano Zampini         for (i=0;i<benign_n;i++) {
11163b03f7bbSStefano Zampini           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1117ca92afb2SStefano Zampini           const PetscInt *idxs;
11183b03f7bbSStefano Zampini           PetscInt       k,j,nz;
111947484b83SStefano Zampini           PetscBLASInt   B_k,B_n;
1120ca92afb2SStefano Zampini 
1121*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscCalloc1(benign_n,&sums));
1122*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
1123*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecCopy(benign_AIIm1_ones,v));
1124*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatSolve(F,v,benign_AIIm1_ones));
1125*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatMult(A,benign_AIIm1_ones,v));
1126*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecResetArray(benign_AIIm1_ones));
11273b03f7bbSStefano Zampini           /* p0 dofs (eliminated) are excluded from the sums */
11283b03f7bbSStefano Zampini           for (k=0;k<benign_n;k++) {
1129*5f80ce2aSJacob Faibussowitsch             CHKERRQ(ISGetLocalSize(is_p_r[k],&nz));
1130*5f80ce2aSJacob Faibussowitsch             CHKERRQ(ISGetIndices(is_p_r[k],&idxs));
11313b03f7bbSStefano Zampini             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1132*5f80ce2aSJacob Faibussowitsch             CHKERRQ(ISRestoreIndices(is_p_r[k],&idxs));
11333b03f7bbSStefano Zampini           }
1134*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecGetArrayRead(v,(const PetscScalar**)&array));
11353b03f7bbSStefano Zampini           if (matl_dbg_viewer) {
11363b03f7bbSStefano Zampini             Vec  vv;
11373b03f7bbSStefano Zampini             char name[16];
11383b03f7bbSStefano Zampini 
1139*5f80ce2aSJacob Faibussowitsch             CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv));
1140*5f80ce2aSJacob Faibussowitsch             CHKERRQ(PetscSNPrintf(name,sizeof(name),"Pvs%D",i));
1141*5f80ce2aSJacob Faibussowitsch             CHKERRQ(PetscObjectSetName((PetscObject)vv,name));
1142*5f80ce2aSJacob Faibussowitsch             CHKERRQ(VecView(vv,matl_dbg_viewer));
11433b03f7bbSStefano Zampini           }
114447484b83SStefano Zampini           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
114547484b83SStefano Zampini           /* cs_AIB already scaled by 1./nz */
114647484b83SStefano Zampini           B_k = 1;
11473b03f7bbSStefano Zampini           for (k=0;k<benign_n;k++) {
11483b03f7bbSStefano Zampini             sum  = sums[k];
1149*5f80ce2aSJacob Faibussowitsch             CHKERRQ(PetscBLASIntCast(size_schur,&B_n));
11503b03f7bbSStefano Zampini 
11513b03f7bbSStefano Zampini             if (PetscAbsScalar(sum) == 0.0) continue;
11523b03f7bbSStefano Zampini             if (k == i) {
115347484b83SStefano Zampini               PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
11543b03f7bbSStefano Zampini               if (matl_dbg_viewer) {
11553b03f7bbSStefano Zampini                 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
11563b03f7bbSStefano Zampini               }
11573b03f7bbSStefano Zampini             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
11583b03f7bbSStefano Zampini               sum /= 2.0;
11593b03f7bbSStefano Zampini               PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
11603b03f7bbSStefano Zampini               if (matl_dbg_viewer) {
11613b03f7bbSStefano Zampini                 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
11623b03f7bbSStefano Zampini               }
11633b03f7bbSStefano Zampini             }
11643b03f7bbSStefano Zampini           }
11655cbda25cSStefano Zampini           sum  = 1.;
116647484b83SStefano Zampini           PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
11673b03f7bbSStefano Zampini           if (matl_dbg_viewer) {
11683b03f7bbSStefano Zampini             PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
11693b03f7bbSStefano Zampini           }
1170*5f80ce2aSJacob Faibussowitsch           CHKERRQ(VecRestoreArrayRead(v,(const PetscScalar**)&array));
11715cbda25cSStefano Zampini           /* set p0 entry of AIIm1_ones to zero */
1172*5f80ce2aSJacob Faibussowitsch           CHKERRQ(ISGetLocalSize(is_p_r[i],&nz));
1173*5f80ce2aSJacob Faibussowitsch           CHKERRQ(ISGetIndices(is_p_r[i],&idxs));
1174282d6408SStefano Zampini           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1175*5f80ce2aSJacob Faibussowitsch           CHKERRQ(ISRestoreIndices(is_p_r[i],&idxs));
1176*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscFree(sums));
11773b03f7bbSStefano Zampini         }
1178*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecDestroy(&benign_AIIm1_ones));
11793b03f7bbSStefano Zampini         if (matl_dbg_viewer) {
1180*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseRestoreArray(S2,&S2_data));
1181*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseRestoreArray(S3,&S3_data));
1182ca92afb2SStefano Zampini         }
1183a7414863SStefano Zampini         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1184a7414863SStefano Zampini           PetscInt k,j;
1185a7414863SStefano Zampini           for (k=0;k<size_schur;k++) {
1186a7414863SStefano Zampini             for (j=k;j<size_schur;j++) {
1187a7414863SStefano Zampini               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1188a7414863SStefano Zampini             }
1189a7414863SStefano Zampini           }
1190a7414863SStefano Zampini         }
1191a7414863SStefano Zampini 
11925cbda25cSStefano Zampini         /* restore defaults */
11935cbda25cSStefano Zampini #if defined(PETSC_HAVE_MUMPS)
1194*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMumpsSetIcntl(F,26,-1));
11955cbda25cSStefano Zampini #endif
11965cbda25cSStefano Zampini #if defined(PETSC_HAVE_MKL_PARDISO)
1197*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatMkl_PardisoSetCntl(F,70,0));
11985cbda25cSStefano Zampini #endif
1199*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB));
1200*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
1201*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecDestroy(&v));
1202*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArray(S_all,&S_data));
12033b03f7bbSStefano Zampini         if (matl_dbg_viewer) {
12043b03f7bbSStefano Zampini           Mat S;
12053b03f7bbSStefano Zampini 
1206*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1207*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatFactorCreateSchurComplement(F,&S,NULL));
1208*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectSetName((PetscObject)S,"Sb"));
1209*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatView(S,matl_dbg_viewer));
1210*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDestroy(&S));
1211*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectSetName((PetscObject)S2,"S2P"));
1212*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatView(S2,matl_dbg_viewer));
1213*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectSetName((PetscObject)S3,"S3P"));
1214*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatView(S3,matl_dbg_viewer));
1215*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs"));
1216*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatView(cs_AIB_mat,matl_dbg_viewer));
1217*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL));
12183b03f7bbSStefano Zampini         }
1219*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&S2));
1220*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&S3));
1221ca92afb2SStefano Zampini       }
1222a3df083aSStefano Zampini       if (!reuse_solvers) {
1223a3df083aSStefano Zampini         for (i=0;i<benign_n;i++) {
1224*5f80ce2aSJacob Faibussowitsch           CHKERRQ(ISDestroy(&is_p_r[i]));
1225a3df083aSStefano Zampini         }
1226*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscFree(is_p_r));
1227*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&cs_AIB_mat));
1228*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&benign_AIIm1_ones_mat));
1229a3df083aSStefano Zampini       }
1230df4d28bfSStefano Zampini     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1231*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all));
1232*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatGetType(S_all,&Stype));
1233683d3df6SStefano Zampini       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1234166598c1SStefano Zampini       factor_workaround = PETSC_FALSE;
1235df4d28bfSStefano Zampini       solver_S = PETSC_FALSE;
1236be83ff47SStefano Zampini     }
1237be83ff47SStefano Zampini 
1238be83ff47SStefano Zampini     if (reuse_solvers) {
1239a00504b5SStefano Zampini       Mat                A_II,Afake;
124053892102SStefano Zampini       Vec                vec1_B;
1241df4d28bfSStefano Zampini       PCBDDCReuseSolvers msolv_ctx;
12423462e049SStefano Zampini       PetscInt           n_R;
1243d5574798SStefano Zampini 
1244df4d28bfSStefano Zampini       if (sub_schurs->reuse_solver) {
1245*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1246e28d306cSStefano Zampini       } else {
1247*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscNew(&sub_schurs->reuse_solver));
1248d62866d3SStefano Zampini       }
1249df4d28bfSStefano Zampini       msolv_ctx = sub_schurs->reuse_solver;
1250*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL));
1251*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)F));
1252d5574798SStefano Zampini       msolv_ctx->F = F;
1253*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateVecs(F,&msolv_ctx->sol,NULL));
1254683d3df6SStefano Zampini       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1255683d3df6SStefano Zampini       {
1256683d3df6SStefano Zampini         PetscScalar *array;
1257683d3df6SStefano Zampini         PetscInt    n;
1258683d3df6SStefano Zampini 
1259*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetLocalSize(msolv_ctx->sol,&n));
1260*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(msolv_ctx->sol,&array));
1261*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs));
1262*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(msolv_ctx->sol,&array));
1263683d3df6SStefano Zampini       }
12643fc34f97SStefano Zampini       msolv_ctx->has_vertices = schur_has_vertices;
1265d62866d3SStefano Zampini 
1266d62866d3SStefano Zampini       /* interior solver */
1267*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver));
1268*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCSetOperators(msolv_ctx->interior_solver,A_II,A_II));
1269*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCSetType(msolv_ctx->interior_solver,PCSHELL));
1270*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)"));
1271*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx));
1272*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1273*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior));
1274*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose));
1275d62866d3SStefano Zampini 
1276d62866d3SStefano Zampini       /* correction solver */
1277*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver));
1278*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCSetType(msolv_ctx->correction_solver,PCSHELL));
1279*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)"));
1280*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx));
1281*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1282*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction));
1283*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose));
128453892102SStefano Zampini 
128553892102SStefano Zampini       /* scatter and vecs for Schur complement solver */
1286*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B));
1287*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateVecs(sub_schurs->S,&vec1_B,NULL));
12883fc34f97SStefano Zampini       if (!schur_has_vertices) {
1289*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B));
1290*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B));
1291*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscObjectReference((PetscObject)is_A_all));
129253892102SStefano Zampini         msolv_ctx->is_R = is_A_all;
1293683d3df6SStefano Zampini       } else {
1294683d3df6SStefano Zampini         IS              is_B_all;
1295683d3df6SStefano Zampini         const PetscInt* idxs;
1296683d3df6SStefano Zampini         PetscInt        dual,n_v,n;
1297683d3df6SStefano Zampini 
1298*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
1299683d3df6SStefano Zampini         dual = size_schur - n_v;
1300*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetLocalSize(is_A_all,&n));
1301*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetIndices(is_A_all,&idxs));
1302*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all));
1303*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B));
1304*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISDestroy(&is_B_all));
1305*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all));
1306*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B));
1307*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISDestroy(&is_B_all));
1308*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R));
1309*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISRestoreIndices(is_A_all,&idxs));
1310683d3df6SStefano Zampini       }
1311*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(msolv_ctx->is_R,&n_R));
1312*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake));
1313*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY));
1314*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY));
1315*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake));
1316*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&Afake));
1317*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecDestroy(&vec1_B));
1318ca92afb2SStefano Zampini 
1319ca92afb2SStefano Zampini       /* communicate benign info to solver context */
1320ca92afb2SStefano Zampini       if (benign_n) {
13215cbda25cSStefano Zampini         PetscScalar *array;
13225cbda25cSStefano Zampini 
1323ca92afb2SStefano Zampini         msolv_ctx->benign_n = benign_n;
1324ca92afb2SStefano Zampini         msolv_ctx->benign_zerodiag_subs = is_p_r;
1325*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals));
13265cbda25cSStefano Zampini         msolv_ctx->benign_csAIB = cs_AIB_mat;
1327*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL));
1328*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(msolv_ctx->benign_corr_work,&array));
1329*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec));
1330*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(msolv_ctx->benign_corr_work,&array));
13315cbda25cSStefano Zampini         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1332ca92afb2SStefano Zampini       }
1333ada6e2d7SStefano Zampini     } else {
1334ada6e2d7SStefano Zampini       if (sub_schurs->reuse_solver) {
1335*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1336ada6e2d7SStefano Zampini       }
1337*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscFree(sub_schurs->reuse_solver));
1338d5574798SStefano Zampini     }
1339*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDestroy(&A));
1340*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&is_A_all));
13415db18549SStefano Zampini 
1342be83ff47SStefano Zampini     /* Work arrays */
1343*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscMalloc1(max_subset_size*max_subset_size,&work));
1344d2627357SStefano Zampini 
1345be83ff47SStefano Zampini     /* S_Ej_all */
1346be83ff47SStefano Zampini     cum = cum2 = 0;
1347*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseGetArrayRead(S_all,&rS_data));
1348*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr));
134904c5b2e6SStefano Zampini     if (compute_Stilda) {
1350*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
135104c5b2e6SStefano Zampini     }
135265d8bf0aSStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
135365d8bf0aSStefano Zampini       PetscInt j;
135465d8bf0aSStefano Zampini 
13555a95e1ceSStefano Zampini       /* get S_E */
1356*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1357683d3df6SStefano Zampini       if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1358be83ff47SStefano Zampini         PetscInt k;
1359be83ff47SStefano Zampini         for (k=0;k<subset_size;k++) {
1360be83ff47SStefano Zampini           for (j=k;j<subset_size;j++) {
13611683a169SBarry Smith             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
13621683a169SBarry Smith             work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1363be83ff47SStefano Zampini           }
1364be83ff47SStefano Zampini         }
136506a4e24aSStefano Zampini       } else { /* just copy to workspace */
1366be83ff47SStefano Zampini         PetscInt k;
1367be83ff47SStefano Zampini         for (k=0;k<subset_size;k++) {
1368be83ff47SStefano Zampini           for (j=0;j<subset_size;j++) {
13691683a169SBarry Smith             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1370be83ff47SStefano Zampini           }
1371be83ff47SStefano Zampini         }
13729087bf02SStefano Zampini       }
13735a95e1ceSStefano Zampini       /* insert S_E values */
1374b7ab4a40SStefano Zampini       if (sub_schurs->change) {
13758760537fSStefano Zampini         Mat change_sub,SEj,T;
137672b8c272SStefano Zampini 
137772b8c272SStefano Zampini         /* change basis */
1378*5f80ce2aSJacob Faibussowitsch         CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1379*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
13808760537fSStefano Zampini         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
13818760537fSStefano Zampini           Mat T2;
1382*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1383*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1384*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1385*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDestroy(&T2));
13868760537fSStefano Zampini         } else {
1387*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
138872b8c272SStefano Zampini         }
1389*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1390*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&T));
1391*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL));
1392*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&SEj));
139372b8c272SStefano Zampini       }
13949d54b7f4SStefano Zampini       if (deluxe) {
1395*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1396683d3df6SStefano Zampini         /* if adaptivity is requested, invert S_E blocks */
1397862806e4SStefano Zampini         if (compute_Stilda) {
13987ebab0bbSStefano Zampini           Mat               M;
13997ebab0bbSStefano Zampini           const PetscScalar *vals;
14007ebab0bbSStefano Zampini           PetscBool         isdense,isdensecuda;
1401f4f7d9d6SStefano Zampini 
1402*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M));
1403*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef));
1404*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian));
14057ebab0bbSStefano Zampini           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1406*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatSetType(M,Stype));
14076c3e6151SStefano Zampini           }
1408*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense));
1409*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda));
14107ebab0bbSStefano Zampini           if (use_cholesky) {
1411*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatCholeskyFactor(M,NULL,NULL));
1412d6462365SStefano Zampini           } else {
1413*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatLUFactor(M,NULL,NULL,NULL));
14142972d61bSStefano Zampini           }
14157ebab0bbSStefano Zampini           if (isdense) {
1416*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatSeqDenseInvertFactors_Private(M));
14177ebab0bbSStefano Zampini #if defined(PETSC_HAVE_CUDA)
14187ebab0bbSStefano Zampini           } else if (isdensecuda) {
1419*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatSeqDenseCUDAInvertFactors_Private(M));
14207ebab0bbSStefano Zampini #endif
142198921bdaSJacob Faibussowitsch           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1422*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseGetArrayRead(M,&vals));
1423*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size));
1424*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseRestoreArrayRead(M,&vals));
1425*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDestroy(&M));
14269087bf02SStefano Zampini         }
14279d54b7f4SStefano Zampini       } else if (compute_Stilda) { /* not using deluxe */
14289d54b7f4SStefano Zampini         Mat         SEj;
14299d54b7f4SStefano Zampini         Vec         D;
14309d54b7f4SStefano Zampini         PetscScalar *array;
14319d54b7f4SStefano Zampini 
1432*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1433*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecGetArray(Dall,&array));
1434*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D));
1435*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecRestoreArray(Dall,&array));
1436*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecShift(D,-1.));
1437*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDiagonalScale(SEj,D,D));
1438*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&SEj));
1439*5f80ce2aSJacob Faibussowitsch         CHKERRQ(VecDestroy(&D));
1440*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
14419d54b7f4SStefano Zampini       }
1442be83ff47SStefano Zampini       cum += subset_size;
1443be83ff47SStefano Zampini       cum2 += subset_size*(size_schur + 1);
144404c5b2e6SStefano Zampini       SEj_arr += subset_size*subset_size;
144504c5b2e6SStefano Zampini       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1446be83ff47SStefano Zampini     }
1447*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDenseRestoreArrayRead(S_all,&rS_data));
1448*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr));
144904c5b2e6SStefano Zampini     if (compute_Stilda) {
1450*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
145104c5b2e6SStefano Zampini     }
1452df4d28bfSStefano Zampini     if (solver_S) {
1453*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
14544a6c6b0dSStefano Zampini     }
1455683d3df6SStefano Zampini 
14567ebab0bbSStefano Zampini     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
14577ebab0bbSStefano Zampini        however, preliminary tests indicate using GPUs is still faster in the solve phase */
14587ebab0bbSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
14597ebab0bbSStefano Zampini     if (reuse_solvers) {
14607ebab0bbSStefano Zampini       Mat                  St;
14617ebab0bbSStefano Zampini       MatFactorSchurStatus st;
14627ebab0bbSStefano Zampini 
146335d0533cSStefano Zampini       flg  = PETSC_FALSE;
1464*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL));
1465*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorGetSchurComplement(F,&St,&st));
1466*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatBindToCPU(St,flg));
1467*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorRestoreSchurComplement(F,&St,st));
14687ebab0bbSStefano Zampini     }
14697ebab0bbSStefano Zampini #endif
14707ebab0bbSStefano Zampini 
1471683d3df6SStefano Zampini     schur_factor = NULL;
147245951f25SStefano Zampini     if (compute_Stilda && size_active_schur) {
1473683d3df6SStefano Zampini 
1474*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
14759d54b7f4SStefano Zampini       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1476*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur));
14774a6c6b0dSStefano Zampini       } else {
1478683d3df6SStefano Zampini         Mat S_all_inv=NULL;
14797ebab0bbSStefano Zampini 
14803fc34f97SStefano Zampini         if (solver_S) {
1481683d3df6SStefano Zampini           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1482683d3df6SStefano Zampini              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
14833fc34f97SStefano Zampini           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1484683d3df6SStefano Zampini             PetscScalar *data;
1485683d3df6SStefano Zampini             PetscInt     nd = 0;
14866dba178dSStefano Zampini 
1487f4f7d9d6SStefano Zampini             if (!use_potr) {
1488683d3df6SStefano Zampini               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1489683d3df6SStefano Zampini             }
1490*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1491*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatDenseGetArray(S_all_inv,&data));
1492683d3df6SStefano Zampini             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1493*5f80ce2aSJacob Faibussowitsch               CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd));
1494683d3df6SStefano Zampini             }
14953fc34f97SStefano Zampini 
14963fc34f97SStefano Zampini             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
14973fc34f97SStefano Zampini             if (schur_has_vertices) {
14983fc34f97SStefano Zampini               Mat          M;
14993fc34f97SStefano Zampini               PetscScalar *tdata;
15003fc34f97SStefano Zampini               PetscInt     nv = 0, news;
15013fc34f97SStefano Zampini 
1502*5f80ce2aSJacob Faibussowitsch               CHKERRQ(ISGetLocalSize(sub_schurs->is_vertices,&nv));
15033fc34f97SStefano Zampini               news = size_active_schur + nv;
1504*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscCalloc1(news*news,&tdata));
1505683d3df6SStefano Zampini               for (i=0;i<size_active_schur;i++) {
1506*5f80ce2aSJacob Faibussowitsch                 CHKERRQ(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i));
1507*5f80ce2aSJacob Faibussowitsch                 CHKERRQ(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv));
15083fc34f97SStefano Zampini               }
15093fc34f97SStefano Zampini               for (i=0;i<nv;i++) {
15103fc34f97SStefano Zampini                 PetscInt k = i+size_active_schur;
1511*5f80ce2aSJacob Faibussowitsch                 CHKERRQ(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i));
15123fc34f97SStefano Zampini               }
15133fc34f97SStefano Zampini 
1514*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M));
1515*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1516*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCholeskyFactor(M,NULL,NULL));
15173fc34f97SStefano Zampini               /* save the factors */
15183fc34f97SStefano Zampini               cum = 0;
1519*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor));
15203fc34f97SStefano Zampini               for (i=0;i<size_active_schur;i++) {
1521*5f80ce2aSJacob Faibussowitsch                 CHKERRQ(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i));
1522683d3df6SStefano Zampini                 cum += size_active_schur - i;
1523683d3df6SStefano Zampini               }
15243fc34f97SStefano Zampini               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1525*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatSeqDenseInvertFactors_Private(M));
15263fc34f97SStefano Zampini               /* move back just the active dofs to the Schur complement */
15273fc34f97SStefano Zampini               for (i=0;i<size_active_schur;i++) {
1528*5f80ce2aSJacob Faibussowitsch                 CHKERRQ(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur));
15293fc34f97SStefano Zampini               }
1530*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscFree(tdata));
1531*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDestroy(&M));
15323fc34f97SStefano Zampini             } else { /* we can factorize and invert just the activedofs */
15333fc34f97SStefano Zampini               Mat         M;
15345002105bSStefano Zampini               PetscScalar *aux;
15353fc34f97SStefano Zampini 
1536*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscMalloc1(nd,&aux));
15375002105bSStefano Zampini               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1538*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M));
1539*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDenseSetLDA(M,size_schur));
1540*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1541*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCholeskyFactor(M,NULL,NULL));
1542*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatSeqDenseInvertFactors_Private(M));
1543*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDestroy(&M));
1544*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M));
1545*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatZeroEntries(M));
1546*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDestroy(&M));
1547*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M));
1548*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDenseSetLDA(M,size_schur));
1549*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatZeroEntries(M));
1550*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDestroy(&M));
15515002105bSStefano Zampini               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1552*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscFree(aux));
15533fc34f97SStefano Zampini             }
1554*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatDenseRestoreArray(S_all_inv,&data));
15553fc34f97SStefano Zampini           } else { /* use MatFactor calls to invert S */
1556*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatFactorInvertSchurComplement(F));
1557*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1558683d3df6SStefano Zampini           }
1559df4d28bfSStefano Zampini         } else { /* we need to invert explicitly since we are not using MatFactor for S */
1560*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscObjectReference((PetscObject)S_all));
1561683d3df6SStefano Zampini           S_all_inv = S_all;
1562*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseGetArray(S_all_inv,&S_data));
1563*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscBLASIntCast(size_schur,&B_N));
1564*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1565f4f7d9d6SStefano Zampini           if (use_potr) {
1566be83ff47SStefano Zampini             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
15672c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1568be83ff47SStefano Zampini             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
15692c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1570f4f7d9d6SStefano Zampini           } else if (use_sytr) {
1571f4f7d9d6SStefano Zampini             PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
15722c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1573f4f7d9d6SStefano Zampini             PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
15742c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1575d6462365SStefano Zampini           } else {
1576d6462365SStefano Zampini             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
15772c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1578d6462365SStefano Zampini             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
15792c71b3e2SJacob Faibussowitsch             PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1580be83ff47SStefano Zampini           }
1581*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscLogFlops(1.0*size_schur*size_schur*size_schur));
1582*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscFPTrapPop());
1583*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseRestoreArray(S_all_inv,&S_data));
1584be83ff47SStefano Zampini         }
1585be83ff47SStefano Zampini         /* S_Ej_tilda_all */
1586be83ff47SStefano Zampini         cum = cum2 = 0;
1587*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseGetArrayRead(S_all_inv,&rS_data));
1588be83ff47SStefano Zampini         for (i=0;i<sub_schurs->n_subs;i++) {
1589be83ff47SStefano Zampini           PetscInt j;
1590862806e4SStefano Zampini 
1591*5f80ce2aSJacob Faibussowitsch           CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1592be83ff47SStefano Zampini           /* get (St^-1)_E */
159372b8c272SStefano Zampini           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
159406a4e24aSStefano Zampini              will be properly accessed later during adaptive selection */
1595a0b0af32SStefano Zampini           if (S_lower_triangular) {
1596be83ff47SStefano Zampini             PetscInt k;
1597b7ab4a40SStefano Zampini             if (sub_schurs->change) {
1598be83ff47SStefano Zampini               for (k=0;k<subset_size;k++) {
1599be83ff47SStefano Zampini                 for (j=k;j<subset_size;j++) {
16001683a169SBarry Smith                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
16016c3e6151SStefano Zampini                   work[j*subset_size+k] = work[k*subset_size+j];
1602be83ff47SStefano Zampini                 }
1603be83ff47SStefano Zampini               }
160472b8c272SStefano Zampini             } else {
160572b8c272SStefano Zampini               for (k=0;k<subset_size;k++) {
160672b8c272SStefano Zampini                 for (j=k;j<subset_size;j++) {
16071683a169SBarry Smith                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
160872b8c272SStefano Zampini                 }
160972b8c272SStefano Zampini               }
161072b8c272SStefano Zampini             }
161172b8c272SStefano Zampini           } else {
1612be83ff47SStefano Zampini             PetscInt k;
1613be83ff47SStefano Zampini             for (k=0;k<subset_size;k++) {
1614be83ff47SStefano Zampini               for (j=0;j<subset_size;j++) {
16151683a169SBarry Smith                 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1616be83ff47SStefano Zampini               }
1617be83ff47SStefano Zampini             }
1618be83ff47SStefano Zampini           }
1619b7ab4a40SStefano Zampini           if (sub_schurs->change) {
16208760537fSStefano Zampini             Mat change_sub,SEj,T;
162172b8c272SStefano Zampini 
162272b8c272SStefano Zampini             /* change basis */
1623*5f80ce2aSJacob Faibussowitsch             CHKERRQ(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1624*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
16258760537fSStefano Zampini             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
16268760537fSStefano Zampini               Mat T2;
1627*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1628*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1629*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatDestroy(&T2));
1630*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
16318760537fSStefano Zampini             } else {
1632*5f80ce2aSJacob Faibussowitsch               CHKERRQ(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
163372b8c272SStefano Zampini             }
1634*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1635*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatDestroy(&T));
163672b8c272SStefano Zampini             /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1637*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL));
1638*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatDestroy(&SEj));
163972b8c272SStefano Zampini           }
1640*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size));
1641be83ff47SStefano Zampini           cum += subset_size;
1642be83ff47SStefano Zampini           cum2 += subset_size*(size_schur + 1);
164304c5b2e6SStefano Zampini           SEjinv_arr += subset_size*subset_size;
1644883469d8SStefano Zampini         }
1645*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDenseRestoreArrayRead(S_all_inv,&rS_data));
1646df4d28bfSStefano Zampini         if (solver_S) {
16473fc34f97SStefano Zampini           if (schur_has_vertices) {
1648*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED));
16493fc34f97SStefano Zampini           } else {
1650*5f80ce2aSJacob Faibussowitsch             CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED));
16515db18549SStefano Zampini           }
16523fc34f97SStefano Zampini         }
1653*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatDestroy(&S_all_inv));
1654683d3df6SStefano Zampini       }
1655*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1656683d3df6SStefano Zampini 
16573fc34f97SStefano Zampini       /* move back factors if needed */
16583fc34f97SStefano Zampini       if (schur_has_vertices) {
1659683d3df6SStefano Zampini         Mat      S_tmp;
16603fc34f97SStefano Zampini         PetscInt nd = 0;
1661683d3df6SStefano Zampini 
16622c71b3e2SJacob Faibussowitsch         PetscCheckFalse(!solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1663*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatFactorGetSchurComplement(F,&S_tmp,NULL));
1664f4f7d9d6SStefano Zampini         if (use_potr) {
1665683d3df6SStefano Zampini           PetscScalar *data;
1666683d3df6SStefano Zampini 
1667*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseGetArray(S_tmp,&data));
1668*5f80ce2aSJacob Faibussowitsch           CHKERRQ(PetscArrayzero(data,size_schur*size_schur));
1669683d3df6SStefano Zampini 
1670683d3df6SStefano Zampini           if (S_lower_triangular) {
1671683d3df6SStefano Zampini             cum = 0;
1672683d3df6SStefano Zampini             for (i=0;i<size_active_schur;i++) {
1673*5f80ce2aSJacob Faibussowitsch               CHKERRQ(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i));
1674683d3df6SStefano Zampini               cum += size_active_schur-i;
1675683d3df6SStefano Zampini             }
1676683d3df6SStefano Zampini           } else {
1677*5f80ce2aSJacob Faibussowitsch             CHKERRQ(PetscArraycpy(data,schur_factor,size_schur*size_schur));
1678683d3df6SStefano Zampini           }
1679683d3df6SStefano Zampini           if (sub_schurs->is_dir) {
1680*5f80ce2aSJacob Faibussowitsch             CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd));
1681683d3df6SStefano Zampini             for (i=0;i<nd;i++) {
1682683d3df6SStefano Zampini               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1683683d3df6SStefano Zampini             }
1684683d3df6SStefano Zampini           }
16856dba178dSStefano Zampini           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1686683d3df6SStefano Zampini              set the diagonal entry of the Schur factor to a very large value */
1687683d3df6SStefano Zampini           for (i=size_active_schur+nd;i<size_schur;i++) {
16886c3e6151SStefano Zampini             data[i*(size_schur+1)] = infty;
1689683d3df6SStefano Zampini           }
1690*5f80ce2aSJacob Faibussowitsch           CHKERRQ(MatDenseRestoreArray(S_tmp,&data));
16916542c05fSStefano Zampini         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1692*5f80ce2aSJacob Faibussowitsch         CHKERRQ(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED));
16939087bf02SStefano Zampini       }
1694367aa537SStefano Zampini     } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1695367aa537SStefano Zampini       PetscScalar *data;
1696367aa537SStefano Zampini       PetscInt    nd = 0;
1697367aa537SStefano Zampini 
1698367aa537SStefano Zampini       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1699*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetLocalSize(sub_schurs->is_dir,&nd));
1700367aa537SStefano Zampini       }
1701*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorGetSchurComplement(F,&S_all,NULL));
1702*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseGetArray(S_all,&data));
1703367aa537SStefano Zampini       for (i=0;i<size_active_schur;i++) {
1704*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1705367aa537SStefano Zampini       }
1706367aa537SStefano Zampini       for (i=size_active_schur+nd;i<size_schur;i++) {
1707*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
17086c3e6151SStefano Zampini         data[i*(size_schur+1)] = infty;
1709367aa537SStefano Zampini       }
1710*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDenseRestoreArray(S_all,&data));
1711*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
17124a6c6b0dSStefano Zampini     }
1713*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(work));
1714*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(schur_factor));
1715*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecDestroy(&Dall));
17164a6c6b0dSStefano Zampini   }
1717*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&is_I_layer));
1718*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&S_all));
1719*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A_BB));
1720*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A_IB));
1721*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&A_BI));
1722*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&F));
17236afe12f5SStefano Zampini 
1724*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(sub_schurs->n_subs,&nnz));
17256afe12f5SStefano Zampini   for (i=0;i<sub_schurs->n_subs;i++) {
1726*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]));
17276afe12f5SStefano Zampini   }
1728*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer));
1729*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz));
1730*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1731*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
17325a95e1ceSStefano Zampini   if (compute_Stilda) {
1733*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz));
1734*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1735*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
17369d54b7f4SStefano Zampini     if (deluxe) {
1737*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz));
1738*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1739*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
174008122e43SStefano Zampini     }
17419d54b7f4SStefano Zampini   }
1742*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&is_I_layer));
17436afe12f5SStefano Zampini 
17445db18549SStefano Zampini   /* Get local part of (\sum_j S_Ej) */
174541fd5443SStefano Zampini   if (!sub_schurs->sum_S_Ej_all) {
1746*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all));
174741fd5443SStefano Zampini   }
1748*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecSet(gstash,0.0));
1749*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray));
1750*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecPlaceArray(lstash,stasharray));
1751*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1752*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1753*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray));
1754*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecResetArray(lstash));
1755*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray));
1756*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecPlaceArray(lstash,stasharray));
1757*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1758*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1759*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray));
1760*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecResetArray(lstash));
176108122e43SStefano Zampini 
1762f6f667cfSStefano Zampini   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
17635a95e1ceSStefano Zampini   if (compute_Stilda) {
1764*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecSet(gstash,0.0));
1765*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1766*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecPlaceArray(lstash,stasharray));
1767*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1768*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1769*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1770*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1771*5f80ce2aSJacob Faibussowitsch     CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1772*5f80ce2aSJacob Faibussowitsch     CHKERRQ(VecResetArray(lstash));
17739d54b7f4SStefano Zampini     if (deluxe) {
1774*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecSet(gstash,0.0));
1775*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1776*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecPlaceArray(lstash,stasharray));
1777*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1778*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1779*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1780*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1781*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1782*5f80ce2aSJacob Faibussowitsch       CHKERRQ(VecResetArray(lstash));
17839d54b7f4SStefano Zampini     } else {
17849d54b7f4SStefano Zampini       PetscScalar *array;
17859d54b7f4SStefano Zampini       PetscInt    cum;
17869d54b7f4SStefano Zampini 
1787*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array));
17889d54b7f4SStefano Zampini       cum = 0;
17899d54b7f4SStefano Zampini       for (i=0;i<sub_schurs->n_subs;i++) {
1790*5f80ce2aSJacob Faibussowitsch         CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1791*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscBLASIntCast(subset_size,&B_N));
1792*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1793f4f7d9d6SStefano Zampini         if (use_potr) {
17949d54b7f4SStefano Zampini           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
17952c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
17969d54b7f4SStefano Zampini           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
17972c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1798f4f7d9d6SStefano Zampini         } else if (use_sytr) {
1799f4f7d9d6SStefano Zampini           PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
18002c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1801f4f7d9d6SStefano Zampini           PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
18022c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1803f4f7d9d6SStefano Zampini         } else {
1804f4f7d9d6SStefano Zampini           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
18052c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1806f4f7d9d6SStefano Zampini           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
18072c71b3e2SJacob Faibussowitsch           PetscCheckFalse(B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1808f4f7d9d6SStefano Zampini         }
1809*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscLogFlops(1.0*subset_size*subset_size*subset_size));
1810*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscFPTrapPop());
18119d54b7f4SStefano Zampini         cum += subset_size*subset_size;
18129d54b7f4SStefano Zampini       }
1813*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1814*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1815*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
18169d54b7f4SStefano Zampini       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
18179d54b7f4SStefano Zampini     }
181808122e43SStefano Zampini   }
1819*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&lstash));
1820*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecDestroy(&gstash));
1821*5f80ce2aSJacob Faibussowitsch   CHKERRQ(VecScatterDestroy(&sstash));
182257a87bf3SStefano Zampini 
18233b03f7bbSStefano Zampini   if (matl_dbg_viewer) {
182411955456SStefano Zampini     PetscInt cum;
182511955456SStefano Zampini 
182611955456SStefano Zampini     if (sub_schurs->S_Ej_all) {
1827*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE"));
1828*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer));
182911955456SStefano Zampini     }
183011955456SStefano Zampini     if (sub_schurs->sum_S_Ej_all) {
1831*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE"));
1832*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer));
183311955456SStefano Zampini     }
183411955456SStefano Zampini     if (sub_schurs->sum_S_Ej_inv_all) {
1835*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm"));
1836*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer));
183711955456SStefano Zampini     }
183811955456SStefano Zampini     if (sub_schurs->sum_S_Ej_tilda_all) {
1839*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt"));
1840*5f80ce2aSJacob Faibussowitsch       CHKERRQ(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer));
184111955456SStefano Zampini     }
184211955456SStefano Zampini     for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
184311955456SStefano Zampini       IS   is;
184411955456SStefano Zampini       char name[16];
184511955456SStefano Zampini 
1846*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscSNPrintf(name,sizeof(name),"IE%D",i));
1847*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1848*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is));
1849*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectSetName((PetscObject)is,name));
1850*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISView(is,matl_dbg_viewer));
1851*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&is));
185211955456SStefano Zampini       cum += subset_size;
185311955456SStefano Zampini     }
185411955456SStefano Zampini   }
18553202ece2SStefano Zampini 
18565a95e1ceSStefano Zampini   /* free workspace */
1857*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscViewerDestroy(&matl_dbg_viewer));
1858*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree2(Bwork,pivots));
1859*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscCommDestroy(&comm_n));
1860b1b3d7a2SStefano Zampini   PetscFunctionReturn(0);
1861b1b3d7a2SStefano Zampini }
1862b1b3d7a2SStefano Zampini 
186388113c35SStefano Zampini PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1864b1b3d7a2SStefano Zampini {
18659bb4a8caSStefano Zampini   IS              *faces,*edges,*all_cc,vertices;
18665a95e1ceSStefano Zampini   PetscInt        i,n_faces,n_edges,n_all_cc;
1867365a3a41SStefano Zampini   PetscBool       is_sorted,ispardiso,ismumps;
1868b1b3d7a2SStefano Zampini   PetscErrorCode  ierr;
1869b1b3d7a2SStefano Zampini 
1870b1b3d7a2SStefano Zampini   PetscFunctionBegin;
1871*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISSorted(is_I,&is_sorted));
18722c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1873*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISSorted(is_B,&is_sorted));
18742c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1875b1b3d7a2SStefano Zampini 
1876b1b3d7a2SStefano Zampini   /* reset any previous data */
1877*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCSubSchursReset(sub_schurs));
1878b1b3d7a2SStefano Zampini 
18795a95e1ceSStefano Zampini   /* get index sets for faces and edges (already sorted by global ordering) */
1880*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1881b1b3d7a2SStefano Zampini   n_all_cc = n_faces+n_edges;
1882*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBTCreate(n_all_cc,&sub_schurs->is_edge));
1883*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscMalloc1(n_all_cc,&all_cc));
1884b1b3d7a2SStefano Zampini   for (i=0;i<n_faces;i++) {
18858b6046baSStefano Zampini     if (copycc) {
1886*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(faces[i],&all_cc[i]));
18878b6046baSStefano Zampini     } else {
1888*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)faces[i]));
1889b1b3d7a2SStefano Zampini       all_cc[i] = faces[i];
1890b1b3d7a2SStefano Zampini     }
18918b6046baSStefano Zampini   }
1892b1b3d7a2SStefano Zampini   for (i=0;i<n_edges;i++) {
18938b6046baSStefano Zampini     if (copycc) {
1894*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDuplicate(edges[i],&all_cc[n_faces+i]));
18958b6046baSStefano Zampini     } else {
1896*5f80ce2aSJacob Faibussowitsch       CHKERRQ(PetscObjectReference((PetscObject)edges[i]));
1897b1b3d7a2SStefano Zampini       all_cc[n_faces+i] = edges[i];
18988b6046baSStefano Zampini     }
1899*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscBTSet(sub_schurs->is_edge,n_faces+i));
1900b1b3d7a2SStefano Zampini   }
1901*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)vertices));
1902c8272957SStefano Zampini   sub_schurs->is_vertices = vertices;
1903*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1904d62866d3SStefano Zampini   sub_schurs->is_dir = NULL;
1905*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir));
1906b1b3d7a2SStefano Zampini 
1907df4d28bfSStefano Zampini   /* Determine if MatFactor can be used */
1908*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrallocpy(prefix,&sub_schurs->prefix));
1909883469d8SStefano Zampini #if defined(PETSC_HAVE_MUMPS)
1910*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type)));
191188113c35SStefano Zampini #elif defined(PETSC_HAVE_MKL_PARDISO)
1912*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type)));
191388113c35SStefano Zampini #else
1914*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type)));
1915df4d28bfSStefano Zampini #endif
191688113c35SStefano Zampini #if defined(PETSC_USE_COMPLEX)
191788113c35SStefano Zampini   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
191888113c35SStefano Zampini #else
191988113c35SStefano Zampini   sub_schurs->is_hermitian  = PETSC_TRUE;
1920883469d8SStefano Zampini #endif
192188113c35SStefano Zampini   sub_schurs->is_posdef     = PETSC_TRUE;
192211955456SStefano Zampini   sub_schurs->is_symmetric  = PETSC_TRUE;
19237f9db97bSStefano Zampini   sub_schurs->debug         = PETSC_FALSE;
1924991c41b4SStefano Zampini   sub_schurs->restrict_comm = PETSC_FALSE;
192588113c35SStefano Zampini   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr);
1926*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL));
1927*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL));
1928*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL));
1929*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL));
1930*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL));
1931*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL));
193288113c35SStefano Zampini   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1933*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps));
1934*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso));
1935365a3a41SStefano Zampini   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1936b1b3d7a2SStefano Zampini 
193711955456SStefano Zampini   /* for reals, symmetric and hermitian are synonims */
193811955456SStefano Zampini #if !defined(PETSC_USE_COMPLEX)
193911955456SStefano Zampini   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
194011955456SStefano Zampini   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
194111955456SStefano Zampini #endif
194211955456SStefano Zampini 
1943*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)is_I));
1944b1b3d7a2SStefano Zampini   sub_schurs->is_I = is_I;
1945*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)is_B));
1946b1b3d7a2SStefano Zampini   sub_schurs->is_B = is_B;
1947*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)graph->l2gmap));
19485db18549SStefano Zampini   sub_schurs->l2gmap = graph->l2gmap;
1949*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscObjectReference((PetscObject)BtoNmap));
19505db18549SStefano Zampini   sub_schurs->BtoNmap = BtoNmap;
19515a95e1ceSStefano Zampini   sub_schurs->n_subs = n_all_cc;
1952b1b3d7a2SStefano Zampini   sub_schurs->is_subs = all_cc;
1953b96c3477SStefano Zampini   sub_schurs->S_Ej_all = NULL;
1954b96c3477SStefano Zampini   sub_schurs->sum_S_Ej_all = NULL;
195508122e43SStefano Zampini   sub_schurs->sum_S_Ej_inv_all = NULL;
1956b96c3477SStefano Zampini   sub_schurs->sum_S_Ej_tilda_all = NULL;
1957b96c3477SStefano Zampini   sub_schurs->is_Ej_all = NULL;
1958b1b3d7a2SStefano Zampini   PetscFunctionReturn(0);
1959b1b3d7a2SStefano Zampini }
1960b1b3d7a2SStefano Zampini 
196134a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
196234a97f8cSStefano Zampini {
196334a97f8cSStefano Zampini   PCBDDCSubSchurs schurs_ctx;
196434a97f8cSStefano Zampini 
196534a97f8cSStefano Zampini   PetscFunctionBegin;
1966*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscNew(&schurs_ctx));
19675ff63025SStefano Zampini   schurs_ctx->n_subs = 0;
196834a97f8cSStefano Zampini   *sub_schurs = schurs_ctx;
196934a97f8cSStefano Zampini   PetscFunctionReturn(0);
197034a97f8cSStefano Zampini }
197134a97f8cSStefano Zampini 
197234a97f8cSStefano Zampini PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
197334a97f8cSStefano Zampini {
197434a97f8cSStefano Zampini   PetscInt       i;
197534a97f8cSStefano Zampini 
197634a97f8cSStefano Zampini   PetscFunctionBegin;
1977aea80f77Sstefano_zampini   if (!sub_schurs) PetscFunctionReturn(0);
1978*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sub_schurs->prefix));
1979*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->A));
1980*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->S));
1981*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&sub_schurs->is_I));
1982*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&sub_schurs->is_B));
1983*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
1984*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
1985*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->S_Ej_all));
1986*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_all));
1987*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1988*5f80ce2aSJacob Faibussowitsch   CHKERRQ(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
1989*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&sub_schurs->is_Ej_all));
1990*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&sub_schurs->is_vertices));
1991*5f80ce2aSJacob Faibussowitsch   CHKERRQ(ISDestroy(&sub_schurs->is_dir));
1992*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscBTDestroy(&sub_schurs->is_edge));
199334a97f8cSStefano Zampini   for (i=0;i<sub_schurs->n_subs;i++) {
1994*5f80ce2aSJacob Faibussowitsch     CHKERRQ(ISDestroy(&sub_schurs->is_subs[i]));
199534a97f8cSStefano Zampini   }
19965ff63025SStefano Zampini   if (sub_schurs->n_subs) {
1997*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PetscFree(sub_schurs->is_subs));
19983dc780c3SStefano Zampini   }
1999df4d28bfSStefano Zampini   if (sub_schurs->reuse_solver) {
2000*5f80ce2aSJacob Faibussowitsch     CHKERRQ(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2001d62866d3SStefano Zampini   }
2002*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sub_schurs->reuse_solver));
200372b8c272SStefano Zampini   if (sub_schurs->change) {
200472b8c272SStefano Zampini     for (i=0;i<sub_schurs->n_subs;i++) {
2005*5f80ce2aSJacob Faibussowitsch       CHKERRQ(KSPDestroy(&sub_schurs->change[i]));
2006*5f80ce2aSJacob Faibussowitsch       CHKERRQ(ISDestroy(&sub_schurs->change_primal_sub[i]));
200772b8c272SStefano Zampini     }
200872b8c272SStefano Zampini   }
2009*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sub_schurs->change));
2010*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(sub_schurs->change_primal_sub));
201134a97f8cSStefano Zampini   sub_schurs->n_subs = 0;
201234a97f8cSStefano Zampini   PetscFunctionReturn(0);
201334a97f8cSStefano Zampini }
201434a97f8cSStefano Zampini 
2015aea80f77Sstefano_zampini PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2016aea80f77Sstefano_zampini {
2017aea80f77Sstefano_zampini   PetscFunctionBegin;
2018*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PCBDDCSubSchursReset(*sub_schurs));
2019*5f80ce2aSJacob Faibussowitsch   CHKERRQ(PetscFree(*sub_schurs));
2020aea80f77Sstefano_zampini   PetscFunctionReturn(0);
2021aea80f77Sstefano_zampini }
2022aea80f77Sstefano_zampini 
20239fbee547SJacob Faibussowitsch static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
202434a97f8cSStefano Zampini {
202534a97f8cSStefano Zampini   PetscInt       i,j,n;
202634a97f8cSStefano Zampini 
202734a97f8cSStefano Zampini   PetscFunctionBegin;
202834a97f8cSStefano Zampini   n = 0;
202934a97f8cSStefano Zampini   for (i=-n_prev;i<0;i++) {
203034a97f8cSStefano Zampini     PetscInt start_dof = queue_tip[i];
203134a97f8cSStefano Zampini     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
203234a97f8cSStefano Zampini       PetscInt dof = adjncy[j];
203334a97f8cSStefano Zampini       if (!PetscBTLookup(touched,dof)) {
2034*5f80ce2aSJacob Faibussowitsch         CHKERRQ(PetscBTSet(touched,dof));
203534a97f8cSStefano Zampini         queue_tip[n] = dof;
203634a97f8cSStefano Zampini         n++;
203734a97f8cSStefano Zampini       }
203834a97f8cSStefano Zampini     }
203934a97f8cSStefano Zampini   }
204034a97f8cSStefano Zampini   *n_added = n;
204134a97f8cSStefano Zampini   PetscFunctionReturn(0);
204234a97f8cSStefano Zampini }
2043