xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision a13376635a6fe97d5335c9292c43fbda3df31874)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 
4 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
5 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, Mat *S);
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "PCBDDCComputeExplicitSchur"
9 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, Mat *S)
10 {
11   Mat            B, C, D, Bd, Cd, AinvBd;
12   KSP            ksp;
13   PC             pc;
14   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
15   PetscReal      fill = 2.0;
16   PetscMPIInt    size;
17   PetscErrorCode ierr;
18 
19   PetscFunctionBegin;
20   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);CHKERRQ(ierr);
21   if (size != 1) {
22     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
23   }
24   ierr = MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);CHKERRQ(ierr);
25   ierr = MatSchurComplementGetKSP(M, &ksp);CHKERRQ(ierr);
26   ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr);
27   ierr = PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);CHKERRQ(ierr);
28   ierr = PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);CHKERRQ(ierr);
29   ierr = PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);CHKERRQ(ierr);
30   ierr = PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);CHKERRQ(ierr);
31   ierr = PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);CHKERRQ(ierr);
32   if (!Bdense) {
33     ierr = MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);CHKERRQ(ierr);
34   } else {
35     Bd = B;
36   }
37 
38   if (isLU || isILU || isCHOL) {
39     Mat fact;
40 
41     ierr = KSPSetUp(ksp);CHKERRQ(ierr);
42     ierr = PCFactorGetMatrix(pc, &fact);CHKERRQ(ierr);
43     ierr = MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);CHKERRQ(ierr);
44     ierr = MatMatSolve(fact, Bd, AinvBd);CHKERRQ(ierr);
45   } else {
46     Mat Ainvd;
47 
48     ierr = PCComputeExplicitOperator(pc, &Ainvd);CHKERRQ(ierr);
49     ierr = MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);CHKERRQ(ierr);
50     ierr = MatDestroy(&Ainvd);CHKERRQ(ierr);
51   }
52   if (!Bdense) {
53     ierr = MatDestroy(&Bd);CHKERRQ(ierr);
54   }
55   if (!Cdense) {
56     ierr = MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);CHKERRQ(ierr);
57   } else {
58     Cd = C;
59   }
60 
61   ierr = MatMatMult(Cd, AinvBd, MAT_INITIAL_MATRIX, fill, S);CHKERRQ(ierr);
62   ierr = MatDestroy(&AinvBd);CHKERRQ(ierr);
63   if (!Cdense) {
64     ierr = MatDestroy(&Cd);CHKERRQ(ierr);
65   }
66 
67   if (D) {
68     Mat       Dd;
69     PetscBool Ddense;
70 
71     ierr = PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);CHKERRQ(ierr);
72     if (!Ddense) {
73       ierr = MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);CHKERRQ(ierr);
74     } else {
75       Dd = D;
76     }
77     ierr = MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
78     if (!Ddense) {
79       ierr = MatDestroy(&Dd);CHKERRQ(ierr);
80     }
81   } else {
82     ierr = MatScale(*S,-1.0);CHKERRQ(ierr);
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNCT__
88 #define __FUNCT__ "PCBDDCSubSchursSetUp"
89 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers)
90 {
91   Mat                    A_II,A_IB,A_BI,A_BB;
92   Mat                    AE_II,*AE_IE,*AE_EI,*AE_EE;
93   Mat                    S_all,global_schur_subsets,work_mat;
94   Mat                    S_Ej_tilda_all;
95   ISLocalToGlobalMapping l2gmap_subsets;
96   IS                     is_I,*is_subset_B,temp_is;
97   PetscInt               *nnz,*all_local_idx_G,*all_local_idx_B,*all_local_idx_N;
98   PetscInt               i,subset_size,max_subset_size;
99   PetscInt               extra,local_size,global_size;
100   PetscBool              compute_Stilda=PETSC_FALSE;
101   PetscErrorCode         ierr;
102 
103   PetscFunctionBegin;
104   /* allocate space for schur complements */
105   ierr = PetscMalloc5(sub_schurs->n_subs,&sub_schurs->is_AEj_B,
106                       sub_schurs->n_subs,&sub_schurs->S_Ej,
107                       sub_schurs->n_subs,&sub_schurs->S_Ej_tilda,
108                       sub_schurs->n_subs,&sub_schurs->work1,
109                       sub_schurs->n_subs,&sub_schurs->work2);CHKERRQ(ierr);
110 
111   /* get Schur complement matrices */
112   if (!sub_schurs->use_mumps) {
113     ierr = MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&A_IB,&A_BI,&A_BB);CHKERRQ(ierr);
114     ierr = PetscMalloc4(sub_schurs->n_subs,&is_subset_B,
115                         sub_schurs->n_subs,&AE_IE,
116                         sub_schurs->n_subs,&AE_EI,
117                         sub_schurs->n_subs,&AE_EE);CHKERRQ(ierr);
118   }
119 
120   /* determine interior problems */
121   if (nlayers >= 0 && xadj != NULL && adjncy != NULL) { /* Interior problems can be different from the original one */
122     PetscBT                touched;
123     const PetscInt*        idx_B;
124     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
125 
126     /* get sizes */
127     ierr = ISGetLocalSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
128     ierr = ISGetLocalSize(sub_schurs->is_B,&n_B);CHKERRQ(ierr);
129 
130     ierr = PetscMalloc1(n_I+n_B,&local_numbering);CHKERRQ(ierr);
131     ierr = PetscBTCreate(n_I+n_B,&touched);CHKERRQ(ierr);
132     ierr = PetscBTMemzero(n_I+n_B,touched);CHKERRQ(ierr);
133 
134     /* all boundary dofs must be skipped when adding layers */
135     ierr = ISGetIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
136     for (j=0;j<n_B;j++) {
137       ierr = PetscBTSet(touched,idx_B[j]);CHKERRQ(ierr);
138     }
139     ierr = PetscMemcpy(local_numbering,idx_B,n_B*sizeof(PetscInt));CHKERRQ(ierr);
140     ierr = ISRestoreIndices(sub_schurs->is_B,&idx_B);CHKERRQ(ierr);
141 
142     /* add prescribed number of layers of dofs */
143     n_local_dofs = n_B;
144     n_prev_added = n_B;
145     for (layer=0;layer<nlayers;layer++) {
146       PetscInt n_added;
147       if (n_local_dofs == n_I+n_B) break;
148       if (n_local_dofs > n_I+n_B) {
149         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %d. Out of bound access (%d > %d)",layer,n_local_dofs,n_I+n_B);
150       }
151       ierr = PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);CHKERRQ(ierr);
152       n_prev_added = n_added;
153       n_local_dofs += n_added;
154       if (!n_added) break;
155     }
156     ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
157 
158     /* IS for I layer dofs in original numbering */
159     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&sub_schurs->is_I_layer);CHKERRQ(ierr);
160     ierr = PetscFree(local_numbering);CHKERRQ(ierr);
161     ierr = ISSort(sub_schurs->is_I_layer);CHKERRQ(ierr);
162     /* IS for I layer dofs in I numbering */
163     if (!sub_schurs->use_mumps) {
164       ISLocalToGlobalMapping ItoNmap;
165       ierr = ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);CHKERRQ(ierr);
166       ierr = ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,sub_schurs->is_I_layer,&is_I);CHKERRQ(ierr);
167       ierr = ISLocalToGlobalMappingDestroy(&ItoNmap);CHKERRQ(ierr);
168 
169       /* II block */
170       ierr = MatGetSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);CHKERRQ(ierr);
171     }
172   } else {
173     PetscInt n_I;
174 
175     /* IS for I dofs in original numbering */
176     ierr = PetscObjectReference((PetscObject)sub_schurs->is_I);CHKERRQ(ierr);
177     sub_schurs->is_I_layer = sub_schurs->is_I;
178 
179     /* IS for I dofs in I numbering (strided 1) */
180     if (!sub_schurs->use_mumps) {
181       ierr = ISGetSize(sub_schurs->is_I,&n_I);CHKERRQ(ierr);
182       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);CHKERRQ(ierr);
183 
184       /* II block is the same */
185       ierr = PetscObjectReference((PetscObject)A_II);CHKERRQ(ierr);
186       AE_II = A_II;
187     }
188   }
189 
190   for (i=0;i<sub_schurs->n_subs;i++) {
191     ierr = ISDuplicate(sub_schurs->is_subs[i],&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr);
192     ierr = ISSort(sub_schurs->is_AEj_B[i]);CHKERRQ(ierr);
193   }
194 
195   /* Get info on subset sizes and sum of all subsets sizes */
196   max_subset_size = 0;
197   local_size = 0;
198   for (i=0;i<sub_schurs->n_subs_seq;i++) {
199     PetscInt j = sub_schurs->index_sequential[i];
200     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[j],&subset_size);CHKERRQ(ierr);
201     max_subset_size = PetscMax(subset_size,max_subset_size);
202     local_size += subset_size;
203   }
204 
205   /* Work arrays for local indices */
206   ierr = PetscMalloc1(local_size,&all_local_idx_B);CHKERRQ(ierr);
207   extra = 0;
208   if (sub_schurs->use_mumps) {
209     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&extra);CHKERRQ(ierr);
210   }
211   ierr = PetscMalloc1(local_size+extra,&all_local_idx_N);CHKERRQ(ierr);
212   if (extra) {
213     const PetscInt *idxs;
214     ierr = ISGetIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
215     ierr = PetscMemcpy(all_local_idx_N,idxs,extra*sizeof(PetscInt));CHKERRQ(ierr);
216     ierr = ISRestoreIndices(sub_schurs->is_I_layer,&idxs);CHKERRQ(ierr);
217   }
218   ierr = PetscMalloc1(local_size,&nnz);CHKERRQ(ierr);
219 
220   /* Get local indices in local numbering */
221   local_size = 0;
222   for (i=0;i<sub_schurs->n_subs_seq;i++) {
223     PetscInt j;
224     const    PetscInt *idxs;
225 
226     PetscInt local_problem_index = sub_schurs->index_sequential[i];
227     ierr = ISGetLocalSize(sub_schurs->is_AEj_B[local_problem_index],&subset_size);CHKERRQ(ierr);
228     ierr = ISGetIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr);
229     /* subset indices in local numbering */
230     ierr = PetscMemcpy(all_local_idx_N+local_size+extra,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
231     ierr = ISRestoreIndices(sub_schurs->is_AEj_B[local_problem_index],&idxs);CHKERRQ(ierr);
232     for (j=0;j<subset_size;j++) nnz[local_size+j] = subset_size;
233     local_size += subset_size;
234   }
235 
236   S_all = NULL;
237   if (!sub_schurs->use_mumps) {
238     /* subsets in original and boundary numbering */
239     for (i=0;i<sub_schurs->n_subs;i++) {
240       ierr = ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_AEj_B[i],&is_subset_B[i]);CHKERRQ(ierr);
241     }
242 
243     /* EE block */
244     for (i=0;i<sub_schurs->n_subs;i++) {
245       ierr = MatGetSubMatrix(A_BB,is_subset_B[i],is_subset_B[i],MAT_INITIAL_MATRIX,&AE_EE[i]);CHKERRQ(ierr);
246     }
247     /* IE block */
248     for (i=0;i<sub_schurs->n_subs;i++) {
249       ierr = MatGetSubMatrix(A_IB,is_I,is_subset_B[i],MAT_INITIAL_MATRIX,&AE_IE[i]);CHKERRQ(ierr);
250     }
251     /* EI block */
252     for (i=0;i<sub_schurs->n_subs;i++) {
253       ierr = MatGetSubMatrix(A_BI,is_subset_B[i],is_I,MAT_INITIAL_MATRIX,&AE_EI[i]);CHKERRQ(ierr);
254     }
255 
256     /* setup Schur complements on subset */
257     for (i=0;i<sub_schurs->n_subs;i++) {
258       ierr = MatCreateSchurComplement(AE_II,AE_II,AE_IE[i],AE_EI[i],AE_EE[i],&sub_schurs->S_Ej[i]);CHKERRQ(ierr);
259       if (AE_II == A_II) { /* we can reuse the same ksp */
260         KSP ksp;
261         ierr = MatSchurComplementGetKSP(sub_schurs->S,&ksp);CHKERRQ(ierr);
262         ierr = MatSchurComplementSetKSP(sub_schurs->S_Ej[i],ksp);CHKERRQ(ierr);
263       } else { /* build new ksp object which inherits ksp and pc types from the original one */
264         KSP      origksp,schurksp;
265         PC       origpc,schurpc;
266         KSPType  ksp_type;
267         PCType   pc_type;
268         PetscInt n_internal;
269 
270         ierr = MatSchurComplementGetKSP(sub_schurs->S,&origksp);CHKERRQ(ierr);
271         ierr = MatSchurComplementGetKSP(sub_schurs->S_Ej[i],&schurksp);CHKERRQ(ierr);
272         ierr = KSPGetType(origksp,&ksp_type);CHKERRQ(ierr);
273         ierr = KSPSetType(schurksp,ksp_type);CHKERRQ(ierr);
274         ierr = KSPGetPC(schurksp,&schurpc);CHKERRQ(ierr);
275         ierr = KSPGetPC(origksp,&origpc);CHKERRQ(ierr);
276         ierr = PCGetType(origpc,&pc_type);CHKERRQ(ierr);
277         ierr = PCSetType(schurpc,pc_type);CHKERRQ(ierr);
278         ierr = ISGetSize(is_I,&n_internal);CHKERRQ(ierr);
279         if (n_internal) { /* UMFPACK gives error with 0 sized problems */
280           MatSolverPackage solver=NULL;
281           ierr = PCFactorGetMatSolverPackage(origpc,(const MatSolverPackage*)&solver);CHKERRQ(ierr);
282           if (solver) {
283             ierr = PCFactorSetMatSolverPackage(schurpc,solver);CHKERRQ(ierr);
284           }
285         }
286         ierr = KSPSetUp(schurksp);CHKERRQ(ierr);
287       }
288     }
289     /* free */
290     ierr = ISDestroy(&is_I);CHKERRQ(ierr);
291     ierr = MatDestroy(&AE_II);CHKERRQ(ierr);
292     for (i=0;i<sub_schurs->n_subs;i++) {
293       ierr = MatDestroy(&AE_EE[i]);CHKERRQ(ierr);
294       ierr = MatDestroy(&AE_IE[i]);CHKERRQ(ierr);
295       ierr = MatDestroy(&AE_EI[i]);CHKERRQ(ierr);
296       ierr = ISDestroy(&is_subset_B[i]);CHKERRQ(ierr);
297     }
298     ierr = PetscFree4(is_subset_B,AE_IE,AE_EI,AE_EE);CHKERRQ(ierr);
299 #if defined(PETSC_HAVE_MUMPS)
300   } else {
301     Mat           A,F;
302     IS            is_A_all;
303     PetscBool     is_symmetric;
304     PetscInt      *idxs_schur,n_I;
305 
306     /* get working mat */
307     ierr = ISGetLocalSize(sub_schurs->is_I_layer,&n_I);CHKERRQ(ierr);
308     ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size+n_I,all_local_idx_N,PETSC_COPY_VALUES,&is_A_all);CHKERRQ(ierr);
309     ierr = MatGetSubMatrixUnsorted(sub_schurs->A,is_A_all,is_A_all,&A);CHKERRQ(ierr);
310     ierr = ISDestroy(&is_A_all);CHKERRQ(ierr);
311 
312     ierr = MatIsSymmetric(sub_schurs->A,0.0,&is_symmetric);CHKERRQ(ierr);
313     if (is_symmetric) {
314       ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_CHOLESKY,&F);CHKERRQ(ierr);
315     } else {
316       ierr = MatGetFactor(A,MATSOLVERMUMPS,MAT_FACTOR_LU,&F);CHKERRQ(ierr);
317     }
318 
319     /* subsets ordered last */
320     ierr = PetscMalloc1(local_size,&idxs_schur);CHKERRQ(ierr);
321     for (i=0;i<local_size;i++) {
322       idxs_schur[i] = n_I+i+1;
323     }
324     ierr = MatMumpsSetSchurIndices(F,local_size,idxs_schur);CHKERRQ(ierr);
325     ierr = PetscFree(idxs_schur);CHKERRQ(ierr);
326 
327     /* factorization step */
328     if (is_symmetric) {
329       ierr = MatCholeskyFactorSymbolic(F,A,NULL,NULL);CHKERRQ(ierr);
330       ierr = MatCholeskyFactorNumeric(F,A,NULL);CHKERRQ(ierr);
331     } else {
332       ierr = MatLUFactorSymbolic(F,A,NULL,NULL,NULL);CHKERRQ(ierr);
333       ierr = MatLUFactorNumeric(F,A,NULL);CHKERRQ(ierr);
334     }
335 
336     /* get explicit Schur Complement computed during numeric factorization */
337     ierr = MatMumpsGetSchurComplement(F,&S_all);CHKERRQ(ierr);
338 
339     /* free workspace */
340     ierr = MatDestroy(&A);CHKERRQ(ierr);
341     ierr = MatDestroy(&F);CHKERRQ(ierr);
342 
343 #endif
344   }
345 
346   /* TODO: just for compatibility with the previous version, needs to be fixed */
347   for (i=0;i<sub_schurs->n_subs_par;i++) {
348     PetscInt j = sub_schurs->index_parallel[i];
349     ierr = MatCreateVecs(sub_schurs->S_Ej[j],&sub_schurs->work1[j],&sub_schurs->work2[j]);CHKERRQ(ierr);
350   }
351   for (i=0;i<sub_schurs->n_subs_seq;i++) {
352      sub_schurs->work1[sub_schurs->index_sequential[i]] = 0;
353      sub_schurs->work2[sub_schurs->index_sequential[i]] = 0;
354   }
355 
356   /* Stildas are not computed without mumps */
357   for (i=0;i<sub_schurs->n_subs;i++) {
358     sub_schurs->S_Ej_tilda[i] = 0;
359   }
360   if (!sub_schurs->n_subs_seq_g) {
361     sub_schurs->S_Ej_all = 0;
362     sub_schurs->sum_S_Ej_all = 0;
363     sub_schurs->is_Ej_all = 0;
364     PetscFunctionReturn(0);
365   }
366 
367   /* subset indices in local boundary numbering */
368   ierr = ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);CHKERRQ(ierr);
369   if (subset_size != local_size) {
370     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %d != %d\n",subset_size,local_size);
371   }
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);CHKERRQ(ierr);
373 
374   /* Local matrix of all local Schur on subsets transposed */
375   ierr = MatCreate(PETSC_COMM_SELF,&sub_schurs->S_Ej_all);CHKERRQ(ierr);
376   ierr = MatSetSizes(sub_schurs->S_Ej_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
377   ierr = MatSetType(sub_schurs->S_Ej_all,MATAIJ);CHKERRQ(ierr);
378   ierr = MatSeqAIJSetPreallocation(sub_schurs->S_Ej_all,0,nnz);CHKERRQ(ierr);
379 
380   sub_schurs->sum_S_Ej_tilda_all = 0;
381   S_Ej_tilda_all = 0;
382 
383   if (!sub_schurs->use_mumps) {
384     PetscScalar *fill_vals;
385     PetscInt    *dummy_idx;
386 
387     /* Work arrays */
388     ierr = PetscMalloc1(max_subset_size,&dummy_idx);CHKERRQ(ierr);
389 
390     /* Loop on local problems end compute Schur complements explicitly */
391     local_size = 0;
392     for (i=0;i<sub_schurs->n_subs_seq;i++) {
393       Mat       S_Ej_expl;
394       PetscInt  j,lpi = sub_schurs->index_sequential[i];
395       PetscBool Sdense;
396 
397       ierr = PCBDDCComputeExplicitSchur(sub_schurs->S_Ej[lpi],&S_Ej_expl);CHKERRQ(ierr);
398       ierr = ISGetLocalSize(sub_schurs->is_AEj_B[lpi],&subset_size);CHKERRQ(ierr);
399       ierr = PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);CHKERRQ(ierr);
400       if (Sdense) {
401         for (j=0;j<subset_size;j++) {
402           dummy_idx[j]=local_size+j;
403         }
404         ierr = MatDenseGetArray(S_Ej_expl,&fill_vals);CHKERRQ(ierr);
405         ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,fill_vals,INSERT_VALUES);CHKERRQ(ierr);
406         ierr = MatDenseRestoreArray(S_Ej_expl,&fill_vals);CHKERRQ(ierr);
407       } else {
408         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
409       }
410       local_size += subset_size;
411       ierr = MatDestroy(&sub_schurs->S_Ej[lpi]);CHKERRQ(ierr);
412       sub_schurs->S_Ej[lpi] = S_Ej_expl;
413     }
414     /* Stildas are not computed without mumps */
415     for (i=0;i<sub_schurs->n_subs;i++) {
416       sub_schurs->S_Ej_tilda[i] = 0;
417     }
418     ierr = PetscFree(dummy_idx);CHKERRQ(ierr);
419   } else {
420     PetscInt  *dummy_idx,n_all;
421     PetscBool is_symmetric;
422 
423     compute_Stilda=PETSC_TRUE;
424     if (compute_Stilda) {
425       ierr = MatCreate(PETSC_COMM_SELF,&S_Ej_tilda_all);CHKERRQ(ierr);
426       ierr = MatSetSizes(S_Ej_tilda_all,PETSC_DECIDE,PETSC_DECIDE,local_size,local_size);CHKERRQ(ierr);
427       ierr = MatSetType(S_Ej_tilda_all,MATAIJ);CHKERRQ(ierr);
428       ierr = MatSeqAIJSetPreallocation(S_Ej_tilda_all,0,nnz);CHKERRQ(ierr);
429     }
430     /* Work arrays */
431     ierr = PetscMalloc1(max_subset_size,&dummy_idx);CHKERRQ(ierr);
432 
433     /* Work arrays */
434     ierr = MatGetSize(S_all,&n_all,NULL);CHKERRQ(ierr);
435     local_size = 0;
436     ierr = MatIsSymmetric(sub_schurs->A,0.0,&is_symmetric);CHKERRQ(ierr);
437     for (i=0;i<sub_schurs->n_subs;i++) {
438       IS  is_E;
439       PetscScalar *vals;
440       PetscInt j;
441 
442       ierr = ISGetLocalSize(sub_schurs->is_AEj_B[i],&subset_size);CHKERRQ(ierr);
443       for (j=0;j<subset_size;j++) {
444         dummy_idx[j]=local_size+j;
445       }
446       ierr = ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),subset_size,local_size,1,&is_E);CHKERRQ(ierr);
447       ierr = MatGetSubMatrix(S_all,is_E,is_E,MAT_INITIAL_MATRIX,&sub_schurs->S_Ej[i]);CHKERRQ(ierr);
448       ierr = MatDenseGetArray(sub_schurs->S_Ej[i],&vals);CHKERRQ(ierr);
449       ierr = MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,vals,INSERT_VALUES);CHKERRQ(ierr);
450       ierr = MatDenseRestoreArray(sub_schurs->S_Ej[i],&vals);CHKERRQ(ierr);
451 
452       if (compute_Stilda) {
453         Mat Stilda;
454         if (sub_schurs->n_subs == 1) {
455           ierr = PetscObjectReference((PetscObject)sub_schurs->S_Ej[i]);CHKERRQ(ierr);
456           Stilda = sub_schurs->S_Ej[i];
457         } else {
458           KSP ksp;
459           PC  pc;
460           Mat S_EF,S_FE,S_FF,Stilda_impl;
461           IS  is_F;
462           PetscScalar eps=1.e-8;
463           PetscBool chop=PETSC_FALSE;
464 
465           ierr = ISComplement(is_E,0,n_all,&is_F);CHKERRQ(ierr);
466           ierr = MatGetSubMatrix(S_all,is_E,is_F,MAT_INITIAL_MATRIX,&S_EF);CHKERRQ(ierr);
467           ierr = MatGetSubMatrix(S_all,is_F,is_F,MAT_INITIAL_MATRIX,&S_FF);CHKERRQ(ierr);
468           ierr = MatGetSubMatrix(S_all,is_F,is_E,MAT_INITIAL_MATRIX,&S_FE);CHKERRQ(ierr);
469           ierr = ISDestroy(&is_F);CHKERRQ(ierr);
470           if (chop) {
471             ierr = MatChop(S_FF,eps);CHKERRQ(ierr);
472             ierr = MatConvert(S_FF,MATAIJ,MAT_REUSE_MATRIX,&S_FF);CHKERRQ(ierr);
473           }
474           ierr = MatCreateSchurComplement(S_FF,S_FF,S_FE,S_EF,sub_schurs->S_Ej[i],&Stilda_impl);CHKERRQ(ierr);
475           ierr = MatDestroy(&S_FF);CHKERRQ(ierr);
476           ierr = MatDestroy(&S_FE);CHKERRQ(ierr);
477           ierr = MatDestroy(&S_EF);CHKERRQ(ierr);
478           ierr = MatSchurComplementGetKSP(Stilda_impl,&ksp);CHKERRQ(ierr);
479           ierr = KSPSetType(ksp,KSPPREONLY);CHKERRQ(ierr);
480           ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
481           if (is_symmetric) {
482             ierr = PCSetType(pc,PCCHOLESKY);CHKERRQ(ierr);
483           } else {
484             ierr = PCSetType(pc,PCLU);CHKERRQ(ierr);
485           }
486           if (!chop) {
487             ierr = PCFactorSetUseInPlace(pc,PETSC_TRUE);CHKERRQ(ierr);
488           } else {
489             ierr = PCFactorSetMatSolverPackage(pc,MATSOLVERMUMPS);CHKERRQ(ierr);
490           }
491           ierr = KSPSetUp(ksp);CHKERRQ(ierr);
492           ierr = PCBDDCComputeExplicitSchur(Stilda_impl,&Stilda);CHKERRQ(ierr);
493           ierr = MatDestroy(&Stilda_impl);CHKERRQ(ierr);
494         }
495 /*
496         PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);
497         ierr = MatView(Stilda,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
498         PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);
499 */
500         sub_schurs->S_Ej_tilda[i]= Stilda;
501         ierr = MatDenseGetArray(sub_schurs->S_Ej_tilda[i],&vals);CHKERRQ(ierr);
502         ierr = MatSetValues(S_Ej_tilda_all,subset_size,dummy_idx,subset_size,dummy_idx,vals,INSERT_VALUES);CHKERRQ(ierr);
503         ierr = MatDenseRestoreArray(sub_schurs->S_Ej_tilda[i],&vals);CHKERRQ(ierr);
504       } else {
505         sub_schurs->S_Ej_tilda[i] = 0;
506       }
507       ierr = ISDestroy(&is_E);CHKERRQ(ierr);
508       local_size += subset_size;
509     }
510     ierr = PetscFree(dummy_idx);CHKERRQ(ierr);
511   }
512   ierr = PetscFree(nnz);CHKERRQ(ierr);
513   ierr = MatDestroy(&S_all);CHKERRQ(ierr);
514   ierr = MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
515   ierr = MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
516   if (compute_Stilda) {
517     ierr = MatAssemblyBegin(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
518     ierr = MatAssemblyEnd(S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
519   }
520 
521   /* Global matrix of all assembled Schur on subsets */
522   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->l2gmap,local_size,all_local_idx_N+extra,PETSC_NULL,&global_size,&all_local_idx_G);CHKERRQ(ierr);
523   ierr = ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,local_size,all_local_idx_G,PETSC_COPY_VALUES,&l2gmap_subsets);CHKERRQ(ierr);
524   ierr = MatCreateIS(PetscObjectComm((PetscObject)sub_schurs->l2gmap),1,PETSC_DECIDE,PETSC_DECIDE,global_size,global_size,l2gmap_subsets,&work_mat);CHKERRQ(ierr);
525   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_subsets);CHKERRQ(ierr);
526   ierr = MatISSetLocalMat(work_mat,sub_schurs->S_Ej_all);CHKERRQ(ierr);
527   ierr = MatISGetMPIXAIJ(work_mat,MAT_INITIAL_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
528   /* Get local part of (\sum_j S_Ej) */
529   ierr = PetscFree(all_local_idx_N);CHKERRQ(ierr);
530   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->l2gmap),local_size,all_local_idx_G,PETSC_OWN_POINTER,&temp_is);CHKERRQ(ierr);
531   ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
532 
533   if (compute_Stilda) {
534     ierr = MatISSetLocalMat(work_mat,S_Ej_tilda_all);CHKERRQ(ierr);
535     ierr = MatISGetMPIXAIJ(work_mat,MAT_REUSE_MATRIX,&global_schur_subsets);CHKERRQ(ierr);
536     ierr = MatGetSubMatrixUnsorted(global_schur_subsets,temp_is,temp_is,&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
537   }
538 
539   ierr = MatDestroy(&global_schur_subsets);CHKERRQ(ierr);
540   ierr = MatDestroy(&S_Ej_tilda_all);CHKERRQ(ierr);
541   ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
542   ierr = ISDestroy(&temp_is);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "PCBDDCSubSchursInit"
548 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, Mat A, Mat S, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscInt seqthreshold)
549 {
550   IS                  *faces,*edges,*all_cc;
551   PetscInt            *index_sequential,*index_parallel;
552   PetscInt            *auxlocal_sequential,*auxlocal_parallel;
553   PetscInt            *auxglobal_sequential,*auxglobal_parallel;
554   PetscInt            *auxmapping;//,*idxs;
555   PetscInt            i,max_subset_size;
556   PetscInt            n_sequential_problems,n_local_sequential_problems,n_parallel_problems,n_local_parallel_problems;
557   PetscInt            n_faces,n_edges,n_all_cc;
558   PetscBool           is_sorted;
559   PetscErrorCode  ierr;
560 
561   PetscFunctionBegin;
562   ierr = ISSorted(is_I,&is_sorted);CHKERRQ(ierr);
563   if (!is_sorted) {
564     SETERRQ(PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
565   }
566   ierr = ISSorted(is_B,&is_sorted);CHKERRQ(ierr);
567   if (!is_sorted) {
568     SETERRQ(PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
569   }
570 
571   /* reset any previous data */
572   ierr = PCBDDCSubSchursReset(sub_schurs);CHKERRQ(ierr);
573 
574   /* get index sets for faces and edges */
575   ierr = PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,NULL);CHKERRQ(ierr);
576   n_all_cc = n_faces+n_edges;
577   ierr = PetscMalloc1(n_all_cc,&all_cc);CHKERRQ(ierr);
578   for (i=0;i<n_faces;i++) {
579     all_cc[i] = faces[i];
580   }
581   for (i=0;i<n_edges;i++) {
582     all_cc[n_faces+i] = edges[i];
583   }
584   ierr = PetscFree(faces);CHKERRQ(ierr);
585   ierr = PetscFree(edges);CHKERRQ(ierr);
586 
587   /* map interface's subsets */
588   max_subset_size = 0;
589   for (i=0;i<n_all_cc;i++) {
590     PetscInt subset_size;
591     ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr);
592     max_subset_size = PetscMax(max_subset_size,subset_size);
593   }
594   ierr = PetscMalloc1(max_subset_size,&auxmapping);CHKERRQ(ierr);
595   ierr = PetscMalloc2(graph->ncc,&auxlocal_sequential,
596                       graph->ncc,&auxlocal_parallel);CHKERRQ(ierr);
597   ierr = PetscMalloc2(graph->ncc,&index_sequential,
598                       graph->ncc,&index_parallel);CHKERRQ(ierr);
599 
600   /* if threshold is negative uses all sequential problems (possibly using MUMPS) */
601   sub_schurs->use_mumps = PETSC_FALSE;
602   if (seqthreshold < 0) {
603     seqthreshold = max_subset_size;
604 #if defined(PETSC_HAVE_MUMPS)
605     sub_schurs->use_mumps = !!A;
606 #endif
607   }
608 
609 
610   /* determine which problem has to be solved in parallel or sequentially */
611   n_local_sequential_problems = 0;
612   n_local_parallel_problems = 0;
613   for (i=0;i<n_all_cc;i++) {
614     PetscInt       subset_size,j,min_loc = 0;
615     const PetscInt *idxs;
616 
617     ierr = ISGetLocalSize(all_cc[i],&subset_size);CHKERRQ(ierr);
618     ierr = ISGetIndices(all_cc[i],&idxs);CHKERRQ(ierr);
619     ierr = ISLocalToGlobalMappingApply(graph->l2gmap,subset_size,idxs,auxmapping);CHKERRQ(ierr);
620     for (j=1;j<subset_size;j++) {
621       if (auxmapping[j]<auxmapping[min_loc]) {
622         min_loc = j;
623       }
624     }
625     if (subset_size > seqthreshold) {
626       index_parallel[n_local_parallel_problems] = i;
627       auxlocal_parallel[n_local_parallel_problems] = idxs[min_loc];
628       n_local_parallel_problems++;
629     } else {
630       index_sequential[n_local_sequential_problems] = i;
631       auxlocal_sequential[n_local_sequential_problems] = idxs[min_loc];
632       n_local_sequential_problems++;
633     }
634     ierr = ISRestoreIndices(all_cc[i],&idxs);CHKERRQ(ierr);
635   }
636 
637   /* Number parallel problems */
638   auxglobal_parallel = 0;
639   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_parallel_problems,auxlocal_parallel,PETSC_NULL,&n_parallel_problems,&auxglobal_parallel);CHKERRQ(ierr);
640 
641   /* Number sequential problems */
642   auxglobal_sequential = 0;
643   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)graph->l2gmap),graph->l2gmap,n_local_sequential_problems,auxlocal_sequential,PETSC_NULL,&n_sequential_problems,&auxglobal_sequential);CHKERRQ(ierr);
644 
645   /* update info in sub_schurs */
646   if (sub_schurs->use_mumps && A) {
647     ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
648     sub_schurs->A = A;
649   }
650   ierr = PetscObjectReference((PetscObject)S);CHKERRQ(ierr);
651   sub_schurs->S = S;
652   ierr = PetscObjectReference((PetscObject)is_I);CHKERRQ(ierr);
653   sub_schurs->is_I = is_I;
654   ierr = PetscObjectReference((PetscObject)is_B);CHKERRQ(ierr);
655   sub_schurs->is_B = is_B;
656   ierr = PetscObjectReference((PetscObject)graph->l2gmap);CHKERRQ(ierr);
657   sub_schurs->l2gmap = graph->l2gmap;
658   ierr = PetscObjectReference((PetscObject)BtoNmap);CHKERRQ(ierr);
659   sub_schurs->BtoNmap = BtoNmap;
660   sub_schurs->n_subs_seq = n_local_sequential_problems;
661   sub_schurs->n_subs_par = n_local_parallel_problems;
662   sub_schurs->n_subs_seq_g = n_sequential_problems;
663   sub_schurs->n_subs_par_g = n_parallel_problems;
664   sub_schurs->n_subs = sub_schurs->n_subs_seq + sub_schurs->n_subs_par;
665   sub_schurs->is_subs = all_cc;
666   sub_schurs->index_sequential = index_sequential;
667   sub_schurs->index_parallel = index_parallel;
668   sub_schurs->auxglobal_sequential = auxglobal_sequential;
669   sub_schurs->auxglobal_parallel = auxglobal_parallel;
670 
671   /* free workspace */
672   ierr = PetscFree(auxmapping);CHKERRQ(ierr);
673   ierr = PetscFree2(auxlocal_sequential,auxlocal_parallel);CHKERRQ(ierr);
674 
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "PCBDDCSubSchursCreate"
680 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
681 {
682   PCBDDCSubSchurs schurs_ctx;
683   PetscErrorCode  ierr;
684 
685   PetscFunctionBegin;
686   ierr = PetscNew(&schurs_ctx);CHKERRQ(ierr);
687   schurs_ctx->n_subs = 0;
688   *sub_schurs = schurs_ctx;
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "PCBDDCSubSchursDestroy"
694 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
695 {
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = PCBDDCSubSchursReset(*sub_schurs);CHKERRQ(ierr);
700   ierr = PetscFree(*sub_schurs);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "PCBDDCSubSchursReset"
706 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
707 {
708   PetscInt       i;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712   ierr = MatDestroy(&sub_schurs->A);CHKERRQ(ierr);
713   ierr = MatDestroy(&sub_schurs->S);CHKERRQ(ierr);
714   ierr = ISDestroy(&sub_schurs->is_I);CHKERRQ(ierr);
715   ierr = ISDestroy(&sub_schurs->is_B);CHKERRQ(ierr);
716   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);CHKERRQ(ierr);
717   ierr = ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);CHKERRQ(ierr);
718   ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
719   ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
720   ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
721   ierr = ISDestroy(&sub_schurs->is_Ej_all);CHKERRQ(ierr);
722   for (i=0;i<sub_schurs->n_subs;i++) {
723     ierr = ISDestroy(&sub_schurs->is_subs[i]);CHKERRQ(ierr);
724     ierr = ISDestroy(&sub_schurs->is_AEj_B[i]);CHKERRQ(ierr);
725     ierr = MatDestroy(&sub_schurs->S_Ej[i]);CHKERRQ(ierr);
726     ierr = MatDestroy(&sub_schurs->S_Ej_tilda[i]);CHKERRQ(ierr);
727     ierr = VecDestroy(&sub_schurs->work1[i]);CHKERRQ(ierr);
728     ierr = VecDestroy(&sub_schurs->work2[i]);CHKERRQ(ierr);
729   }
730   ierr = ISDestroy(&sub_schurs->is_I_layer);CHKERRQ(ierr);
731   if (sub_schurs->n_subs) {
732     ierr = PetscFree(sub_schurs->is_subs);CHKERRQ(ierr);
733     ierr = PetscFree5(sub_schurs->is_AEj_B,sub_schurs->S_Ej,sub_schurs->S_Ej_tilda,sub_schurs->work1,sub_schurs->work2);CHKERRQ(ierr);
734     ierr = PetscFree2(sub_schurs->index_sequential,sub_schurs->index_parallel);CHKERRQ(ierr);
735     ierr = PetscFree(sub_schurs->auxglobal_sequential);CHKERRQ(ierr);
736     ierr = PetscFree(sub_schurs->auxglobal_parallel);CHKERRQ(ierr);
737   }
738   sub_schurs->n_subs = 0;
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "PCBDDCAdjGetNextLayer_Private"
744 PETSC_STATIC_INLINE PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
745 {
746   PetscInt       i,j,n;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   n = 0;
751   for (i=-n_prev;i<0;i++) {
752     PetscInt start_dof = queue_tip[i];
753     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
754       PetscInt dof = adjncy[j];
755       if (!PetscBTLookup(touched,dof)) {
756         ierr = PetscBTSet(touched,dof);CHKERRQ(ierr);
757         queue_tip[n] = dof;
758         n++;
759       }
760     }
761   }
762   *n_added = n;
763   PetscFunctionReturn(0);
764 }
765