xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 3458826566740d380a0f01335e326039948fd12f)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146   /* partial clean up */
147   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
148   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
149   if (A != B) {
150     ierr = MatDestroy(&B);CHKERRQ(ierr);
151   }
152 
153   /* get back data */
154   if (ncc) *ncc = graph->ncc;
155   if (cc) {
156     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
157     for (i=0;i<graph->ncc;i++) {
158       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
159     }
160     *cc = cc_n;
161   }
162   /* clean up graph */
163   graph->xadj = 0;
164   graph->adjncy = 0;
165   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "PCBDDCBenignCheck"
171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
172 {
173   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
174   PC_IS*         pcis = (PC_IS*)(pc->data);
175   IS             dirIS = NULL;
176   PetscInt       i;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
181   if (zerodiag) {
182     Mat            A;
183     Vec            vec3_N;
184     PetscScalar    *vals;
185     const PetscInt *idxs;
186     PetscInt       nz;
187 
188     /* p0 */
189     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
190     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
191     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
192     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
193     for (i=0;i<nz;i++) vals[i] = 1.;
194     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
195     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
196     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
197     /* v_I */
198     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
199     for (i=0;i<nz;i++) vals[i] = 0.;
200     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
201     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
202     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
203     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
204     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
205     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
206     if (dirIS) {
207       PetscInt n;
208 
209       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
210       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
211       for (i=0;i<n;i++) vals[i] = 0.;
212       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
213       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
214     }
215     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
216     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
218     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
219     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
220     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
221     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
222     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
223       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
224     }
225     ierr = PetscFree(vals);CHKERRQ(ierr);
226     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
227   }
228   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
229 
230   /* check PCBDDCBenignGetOrSetP0 */
231   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
232   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
233   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
234   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
235   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
236   for (i=0;i<pcbddc->benign_n;i++) {
237     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
238       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
239     }
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
247 {
248   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
249   IS             pressures,zerodiag,*zerodiag_subs;
250   PetscInt       nz,n;
251   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
256   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
257   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
258   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
259      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
260      Checks if all the pressure dofs in each subdomain have a zero diagonal
261      If not, a change of basis on pressures is not needed
262      since the local Schur complements are SPD
263   */
264   has_null_pressures = PETSC_TRUE;
265   have_null = PETSC_TRUE;
266   if (pcbddc->n_ISForDofsLocal) {
267     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
268 
269     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
270     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
271     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
272     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
273     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
274     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
275     if (!sorted) {
276       ierr = ISSort(pressures);CHKERRQ(ierr);
277     }
278   } else {
279     pressures = NULL;
280   }
281   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
282   ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr);
283   /* TODO: add check for shared dofs and raise error */
284   ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr);
285   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
286   if (!sorted) {
287     ierr = ISSort(zerodiag);CHKERRQ(ierr);
288   }
289   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
290   if (!nz) {
291     if (n) have_null = PETSC_FALSE;
292     has_null_pressures = PETSC_FALSE;
293     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
294   }
295   recompute_zerodiag = PETSC_FALSE;
296   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
297   zerodiag_subs = NULL;
298   pcbddc->benign_n = 0;
299   if (has_null_pressures) {
300     IS       *subs;
301     PetscInt nsubs,i;
302 
303     subs = pcbddc->local_subs;
304     nsubs = pcbddc->n_local_subs;
305     if (nsubs > 1) {
306       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
307       for (i=0;i<nsubs;i++) {
308         ISLocalToGlobalMapping l2g;
309         IS                     t_zerodiag_subs;
310         PetscInt               nl;
311 
312         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
313         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
314         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
315         if (nl) {
316           PetscBool valid = PETSC_TRUE;
317 
318           if (pressures) {
319             IS t_pressure_subs;
320             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
321             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
322             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
323           }
324           if (valid) {
325             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
326             pcbddc->benign_n++;
327           } else {
328             recompute_zerodiag = PETSC_TRUE;
329           }
330         }
331         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
332         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
333       }
334     } else { /* there's just one subdomain (or zero if they have not been detected */
335       PetscBool valid = PETSC_TRUE;
336       if (pressures) {
337         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
338       }
339       if (valid) {
340         pcbddc->benign_n = 1;
341         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
342         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
343         zerodiag_subs[0] = zerodiag;
344       }
345     }
346   }
347 
348   if (!pcbddc->benign_n) {
349     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
350     recompute_zerodiag = PETSC_FALSE;
351     has_null_pressures = PETSC_FALSE;
352     have_null = PETSC_FALSE;
353   }
354 
355   /* final check for null pressures */
356   if (zerodiag && pressures) {
357     PetscInt nz,np;
358     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
359     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
360     if (nz != np) have_null = PETSC_FALSE;
361   }
362 
363   if (recompute_zerodiag) {
364     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
365     if (pcbddc->benign_n == 1) {
366       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
367       zerodiag = zerodiag_subs[0];
368     } else {
369       PetscInt i,nzn,*new_idxs;
370 
371       nzn = 0;
372       for (i=0;i<pcbddc->benign_n;i++) {
373         PetscInt ns;
374         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
375         nzn += ns;
376       }
377       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
378       nzn = 0;
379       for (i=0;i<pcbddc->benign_n;i++) {
380         PetscInt ns,*idxs;
381         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
382         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
383         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
384         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         nzn += ns;
386       }
387       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
388       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
389     }
390     have_null = PETSC_FALSE;
391   }
392 
393   if (has_null_pressures) {
394     IS             zerodiagc;
395     const PetscInt *idxs,*idxsc;
396     PetscInt       i,s,*nnz;
397 
398     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
399     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
400     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
401     /* local change of basis for pressures */
402     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
403     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr);
404     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
405     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
406     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
407     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
408     for (i=0;i<pcbddc->benign_n;i++) {
409       PetscInt nzs,j;
410 
411       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
412       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
413       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
414       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
415       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
416     }
417     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
418     ierr = PetscFree(nnz);CHKERRQ(ierr);
419     /* set identity on velocities */
420     for (i=0;i<n-nz;i++) {
421       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
422     }
423     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
424     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
425     ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
426     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
427     /* set change on pressures */
428     for (s=0;s<pcbddc->benign_n;s++) {
429       PetscScalar *array;
430       PetscInt    nzs;
431 
432       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
433       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
434       for (i=0;i<nzs-1;i++) {
435         PetscScalar vals[2];
436         PetscInt    cols[2];
437 
438         cols[0] = idxs[i];
439         cols[1] = idxs[nzs-1];
440         vals[0] = 1.;
441         vals[1] = 1.;
442         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
443       }
444       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
445       for (i=0;i<nzs-1;i++) array[i] = -1.;
446       array[nzs-1] = 1.;
447       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
448       /* store local idxs for p0 */
449       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
450       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
451       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
452       ierr = PetscFree(array);CHKERRQ(ierr);
453     }
454     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     /* TODO: need optimization? */
457     ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr);
458     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
459     ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr);
460     ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
461     /* store global idxs for p0 */
462     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
465   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
466 
467   /* determines if the coarse solver will be singular or not */
468   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
469   *zerodiaglocal = zerodiag;
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
476 {
477   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   if (!pcbddc->benign_sf) {
482     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
483     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
484   }
485   if (get) { /* use SF to get values */
486     PetscScalar *array;
487 
488     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
489     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
490     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
492   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
493     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
494     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
495     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
503 {
504   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   /* TODO: add error checking
509     - avoid nested pop (or push) calls.
510     - cannot push before pop.
511     - cannot call this if pcbddc->local_mat is NULL
512   */
513   if (!pcbddc->benign_n) {
514     PetscFunctionReturn(0);
515   }
516   if (pop) {
517     IS       is_p0;
518     MatReuse reuse;
519 
520     /* extract B_0 */
521     reuse = MAT_INITIAL_MATRIX;
522     if (pcbddc->benign_B0) {
523       reuse = MAT_REUSE_MATRIX;
524     }
525     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
526     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
527     /* remove rows and cols from local problem */
528     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
529     ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
530     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
531     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
532   } else { /* push */
533     PetscInt i;
534 
535     for (i=0;i<pcbddc->benign_n;i++) {
536       PetscScalar *B0_vals;
537       PetscInt    *B0_cols,B0_ncol;
538 
539       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
540       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
541       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
542       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
543       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "PCBDDCAdaptiveSelection"
553 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
554 {
555   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
556   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
557   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
558   PetscBLASInt    *B_iwork,*B_ifail;
559   PetscScalar     *work,lwork;
560   PetscScalar     *St,*S,*eigv;
561   PetscScalar     *Sarray,*Starray;
562   PetscReal       *eigs,thresh;
563   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
564   PetscBool       allocated_S_St;
565 #if defined(PETSC_USE_COMPLEX)
566   PetscReal       *rwork;
567 #endif
568   PetscErrorCode  ierr;
569 
570   PetscFunctionBegin;
571   if (!sub_schurs->use_mumps) {
572     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
573   }
574 
575   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
576     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
577   }
578 
579   if (pcbddc->dbg_flag) {
580     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
581     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
582     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
584   }
585 
586   if (pcbddc->dbg_flag) {
587     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
588   }
589 
590   /* max size of subsets */
591   mss = 0;
592   for (i=0;i<sub_schurs->n_subs;i++) {
593     PetscInt subset_size;
594 
595     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
596     mss = PetscMax(mss,subset_size);
597   }
598 
599   /* min/max and threshold */
600   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
601   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
602   nmax = PetscMax(nmin,nmax);
603   allocated_S_St = PETSC_FALSE;
604   if (nmin) {
605     allocated_S_St = PETSC_TRUE;
606   }
607 
608   /* allocate lapack workspace */
609   cum = cum2 = 0;
610   maxneigs = 0;
611   for (i=0;i<sub_schurs->n_subs;i++) {
612     PetscInt n,subset_size;
613 
614     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
615     n = PetscMin(subset_size,nmax);
616     cum += subset_size;
617     cum2 += subset_size*n;
618     maxneigs = PetscMax(maxneigs,n);
619   }
620   if (mss) {
621     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
622       PetscBLASInt B_itype = 1;
623       PetscBLASInt B_N = mss;
624       PetscReal    zero = 0.0;
625       PetscReal    eps = 0.0; /* dlamch? */
626 
627       B_lwork = -1;
628       S = NULL;
629       St = NULL;
630       eigs = NULL;
631       eigv = NULL;
632       B_iwork = NULL;
633       B_ifail = NULL;
634 #if defined(PETSC_USE_COMPLEX)
635       rwork = NULL;
636 #endif
637       thresh = 1.0;
638       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
639 #if defined(PETSC_USE_COMPLEX)
640       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
641 #else
642       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
643 #endif
644       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
645       ierr = PetscFPTrapPop();CHKERRQ(ierr);
646     } else {
647         /* TODO */
648     }
649   } else {
650     lwork = 0;
651   }
652 
653   nv = 0;
654   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
655     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
656   }
657   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
658   if (allocated_S_St) {
659     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
660   }
661   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
662 #if defined(PETSC_USE_COMPLEX)
663   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
664 #endif
665   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
666                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
667                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
668                       nv+cum,&pcbddc->adaptive_constraints_idxs,
669                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
670   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
671 
672   maxneigs = 0;
673   cum = cum2 = cumarray = 0;
674   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
675   pcbddc->adaptive_constraints_data_ptr[0] = 0;
676   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
677     const PetscInt *idxs;
678 
679     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
680     for (cum=0;cum<nv;cum++) {
681       pcbddc->adaptive_constraints_n[cum] = 1;
682       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
683       pcbddc->adaptive_constraints_data[cum] = 1.0;
684       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
685       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
686     }
687     cum2 = cum;
688     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
689   }
690 
691   if (mss) { /* multilevel */
692     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
693     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
694   }
695 
696   thresh = pcbddc->adaptive_threshold;
697   for (i=0;i<sub_schurs->n_subs;i++) {
698     const PetscInt *idxs;
699     PetscReal      infty = PETSC_MAX_REAL;
700     PetscInt       j,subset_size,eigs_start = 0;
701     PetscBLASInt   B_N;
702     PetscBool      same_data = PETSC_FALSE;
703 
704     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
705     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
706     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
707     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
708       if (sub_schurs->is_hermitian) {
709         PetscInt j,k;
710         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
711           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
712           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
713         }
714         for (j=0;j<subset_size;j++) {
715           for (k=j;k<subset_size;k++) {
716             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
717             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
718           }
719         }
720       } else {
721         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
722         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
723       }
724     } else {
725       S = Sarray + cumarray;
726       St = Starray + cumarray;
727     }
728 
729     /* see if we can save some work */
730     if (sub_schurs->n_subs == 1) {
731       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
732     }
733 
734     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
735       B_neigs = 0;
736     } else {
737       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
738         PetscBLASInt B_itype = 1;
739         PetscBLASInt B_IL, B_IU;
740         PetscReal    eps = -1.0; /* dlamch? */
741         PetscInt     nmin_s;
742 
743         if (pcbddc->dbg_flag) {
744           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
745         }
746 
747         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
748         if (thresh > 1.+PETSC_SMALL) {
749 
750           /* ask for eigenvalues larger than thresh */
751 #if defined(PETSC_USE_COMPLEX)
752           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
753 #else
754           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
755 #endif
756         } else {
757           B_IU = PetscMax(1,PetscMin(B_N,nmax));
758           B_IL = 1;
759 #if defined(PETSC_USE_COMPLEX)
760           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
761 #else
762           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
763 #endif
764         }
765         ierr = PetscFPTrapPop();CHKERRQ(ierr);
766         if (B_ierr) {
767           if (B_ierr < 0 ) {
768             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
769           } else if (B_ierr <= B_N) {
770             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
771           } else {
772             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
773           }
774         }
775 
776         if (B_neigs > nmax) {
777           if (pcbddc->dbg_flag) {
778             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
779           }
780           eigs_start = B_neigs -nmax;
781           B_neigs = nmax;
782         }
783 
784         nmin_s = PetscMin(nmin,B_N);
785         if (B_neigs < nmin_s) {
786           PetscBLASInt B_neigs2;
787 
788           B_IU = B_N - B_neigs;
789           B_IL = B_N - nmin_s + 1;
790           if (pcbddc->dbg_flag) {
791             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
792           }
793           if (sub_schurs->is_hermitian) {
794             PetscInt j;
795             for (j=0;j<subset_size;j++) {
796               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
797             }
798             for (j=0;j<subset_size;j++) {
799               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
800             }
801           } else {
802             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
803             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
804           }
805           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
806 #if defined(PETSC_USE_COMPLEX)
807           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
808 #else
809           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
810 #endif
811           ierr = PetscFPTrapPop();CHKERRQ(ierr);
812           B_neigs += B_neigs2;
813         }
814         if (B_ierr) {
815           if (B_ierr < 0 ) {
816             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
817           } else if (B_ierr <= B_N) {
818             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
819           } else {
820             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
821           }
822         }
823         if (pcbddc->dbg_flag) {
824           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
825           for (j=0;j<B_neigs;j++) {
826             if (eigs[j] == 0.0) {
827               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
828             } else {
829               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
830             }
831           }
832         }
833       } else {
834           /* TODO */
835       }
836     }
837     maxneigs = PetscMax(B_neigs,maxneigs);
838     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
839     if (B_neigs) {
840       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
841 
842       if (pcbddc->dbg_flag > 1) {
843         PetscInt ii;
844         for (ii=0;ii<B_neigs;ii++) {
845           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
846           for (j=0;j<B_N;j++) {
847 #if defined(PETSC_USE_COMPLEX)
848             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
849             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
850             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
851 #else
852             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
853 #endif
854           }
855         }
856       }
857 #if 0
858       for (j=0;j<B_neigs;j++) {
859         PetscBLASInt Blas_N,Blas_one = 1.0;
860         PetscScalar norm;
861         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
862         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
863                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
864         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
865           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
866         } else {
867           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
868         }
869         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
870       }
871 #endif
872       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
873       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
874       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
875       cum++;
876     }
877     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
878     /* shift for next computation */
879     cumarray += subset_size*subset_size;
880   }
881   if (pcbddc->dbg_flag) {
882     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
883   }
884 
885   if (mss) {
886     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
887     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
888     /* destroy matrices (junk) */
889     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
890     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
891   }
892   if (allocated_S_St) {
893     ierr = PetscFree2(S,St);CHKERRQ(ierr);
894   }
895   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
896 #if defined(PETSC_USE_COMPLEX)
897   ierr = PetscFree(rwork);CHKERRQ(ierr);
898 #endif
899   if (pcbddc->dbg_flag) {
900     PetscInt maxneigs_r;
901     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
902     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
903   }
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "PCBDDCSetUpSolvers"
909 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
910 {
911   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
912   PetscScalar    *coarse_submat_vals;
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   /* Setup local scatters R_to_B and (optionally) R_to_D */
917   /* PCBDDCSetUpLocalWorkVectors should be called first! */
918   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
919 
920   /* Setup local neumann solver ksp_R */
921   /* PCBDDCSetUpLocalScatters should be called first! */
922   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
923 
924   /* Change global null space passed in by the user if change of basis has been requested */
925   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
926     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
927   }
928 
929   /*
930      Setup local correction and local part of coarse basis.
931      Gives back the dense local part of the coarse matrix in column major ordering
932   */
933   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
934 
935   /* Compute total number of coarse nodes and setup coarse solver */
936   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
937 
938   /* free */
939   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "PCBDDCResetCustomization"
945 PetscErrorCode PCBDDCResetCustomization(PC pc)
946 {
947   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
948   PetscErrorCode ierr;
949 
950   PetscFunctionBegin;
951   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
952   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
953   ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
954   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
955   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
956   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
957   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
958   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
959   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
960   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
961   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
962   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "PCBDDCResetTopography"
968 PetscErrorCode PCBDDCResetTopography(PC pc)
969 {
970   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
971   PetscInt       i;
972   PetscErrorCode ierr;
973 
974   PetscFunctionBegin;
975   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
976   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
977   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
978   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
979   for (i=0;i<pcbddc->n_local_subs;i++) {
980     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
981   }
982   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
983   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PCBDDCResetSolvers"
989 PetscErrorCode PCBDDCResetSolvers(PC pc)
990 {
991   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
992   PetscScalar    *array;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
997   if (pcbddc->coarse_phi_B) {
998     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
999     ierr = PetscFree(array);CHKERRQ(ierr);
1000   }
1001   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1002   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1003   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1004   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1005   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1006   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1007   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1008   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1009   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1010   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1011   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1012   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1013   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1014   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1015   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1016   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1017   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1018   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1019   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1020   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1021   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1022   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1023   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1024   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1025   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1026   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1027   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1028   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1029   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1035 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1036 {
1037   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1038   PC_IS          *pcis = (PC_IS*)pc->data;
1039   VecType        impVecType;
1040   PetscInt       n_constraints,n_R,old_size;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBegin;
1044   if (!pcbddc->ConstraintMatrix) {
1045     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1046   }
1047   /* get sizes */
1048   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1049   n_R = pcis->n - pcbddc->n_vertices;
1050   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1051   /* local work vectors (try to avoid unneeded work)*/
1052   /* R nodes */
1053   old_size = -1;
1054   if (pcbddc->vec1_R) {
1055     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1056   }
1057   if (n_R != old_size) {
1058     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1059     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1060     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1061     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1062     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1063     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1064   }
1065   /* local primal dofs */
1066   old_size = -1;
1067   if (pcbddc->vec1_P) {
1068     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1069   }
1070   if (pcbddc->local_primal_size != old_size) {
1071     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1072     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1073     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1074     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1075   }
1076   /* local explicit constraints */
1077   old_size = -1;
1078   if (pcbddc->vec1_C) {
1079     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1080   }
1081   if (n_constraints && n_constraints != old_size) {
1082     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1083     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1084     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1085     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "PCBDDCSetUpCorrection"
1092 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1093 {
1094   PetscErrorCode  ierr;
1095   /* pointers to pcis and pcbddc */
1096   PC_IS*          pcis = (PC_IS*)pc->data;
1097   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1098   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1099   /* submatrices of local problem */
1100   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1101   /* submatrices of benign trick */
1102   Mat             B0_V = NULL;
1103   /* submatrices of local coarse problem */
1104   Mat             S_VV,S_CV,S_VC,S_CC;
1105   /* working matrices */
1106   Mat             C_CR;
1107   /* additional working stuff */
1108   PC              pc_R;
1109   Mat             F;
1110   PetscBool       isLU,isCHOL,isILU;
1111 
1112   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1113   PetscScalar     *work;
1114   PetscInt        *idx_V_B;
1115   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
1116   PetscInt        i,n_R,n_D,n_B;
1117 
1118   /* some shortcuts to scalars */
1119   PetscScalar     one=1.0,m_one=-1.0;
1120 
1121   PetscFunctionBegin;
1122   if (!pcbddc->symmetric_primal && pcbddc->benign_n) {
1123     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
1124   }
1125 
1126   /* Set Non-overlapping dimensions */
1127   n_vertices = pcbddc->n_vertices;
1128   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1129   n_B = pcis->n_B;
1130   n_D = pcis->n - n_B;
1131   n_R = pcis->n - n_vertices;
1132 
1133   /* vertices in boundary numbering */
1134   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1135   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1136   if (i != n_vertices) {
1137     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1138   }
1139 
1140   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1141   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1142   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1143   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1144   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1145   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1146   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1147   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1148   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1149   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1150 
1151   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1152   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1153   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1154   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1155   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1156   lda_rhs = n_R;
1157   if (isLU || isILU || isCHOL) {
1158     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1159   } else if (sub_schurs->reuse_mumps) {
1160     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1161     MatFactorType type;
1162 
1163     F = reuse_mumps->F;
1164     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1165     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1166     ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr);
1167   } else {
1168     F = NULL;
1169   }
1170 
1171   /* allocate workspace */
1172   n = 0;
1173   if (n_constraints) {
1174     n += lda_rhs*n_constraints;
1175   }
1176   if (n_vertices) {
1177     n = PetscMax(2*lda_rhs*n_vertices,n);
1178     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
1179   }
1180   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1181 
1182   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1183   if (n_constraints) {
1184     Mat         M1,M2,M3;
1185     Mat         auxmat;
1186     IS          is_aux;
1187     PetscScalar *array,*array2;
1188 
1189     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1190     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1191 
1192     /* Extract constraints on R nodes: C_{CR}  */
1193     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1194     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1195     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1196 
1197     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1198     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1199     ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1200     for (i=0;i<n_constraints;i++) {
1201       const PetscScalar *row_cmat_values;
1202       const PetscInt    *row_cmat_indices;
1203       PetscInt          size_of_constraint,j;
1204 
1205       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1206       for (j=0;j<size_of_constraint;j++) {
1207         work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
1208       }
1209       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1210     }
1211     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1212     if (F) {
1213       Mat B;
1214 
1215       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1216       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1217       ierr = MatDestroy(&B);CHKERRQ(ierr);
1218     } else {
1219       PetscScalar *marr;
1220 
1221       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1222       for (i=0;i<n_constraints;i++) {
1223         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1224         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr);
1225         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1226         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1227         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1228       }
1229       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1230     }
1231     if (!pcbddc->switch_static) {
1232       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1233       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1234       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1235       for (i=0;i<n_constraints;i++) {
1236         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr);
1237         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1238         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1241         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1242       }
1243       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1244       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1245       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1246     } else {
1247       if (lda_rhs != n_R) {
1248         IS dummy;
1249 
1250         ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr);
1251         ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1252         ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1253       } else {
1254         ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1255         pcbddc->local_auxmat2 = local_auxmat2_R;
1256       }
1257       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1258     }
1259     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1260     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1261     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1262     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1263     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1264     if (isCHOL) {
1265       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1266     } else {
1267       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1268     }
1269     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1270     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1271     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1272     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1273     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1274     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1275     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1276     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1277     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1278     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1279   }
1280   /* Get submatrices from subdomain matrix */
1281   if (pcbddc->benign_n) {
1282     IS        dummy;
1283     Mat       B0_R;
1284     PetscReal norm;
1285 
1286     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1287     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1288     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1289     if (norm > PETSC_SMALL) {
1290       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1291     }
1292     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1293     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1294   }
1295 
1296   if (n_vertices) {
1297     IS is_aux;
1298 
1299     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1300       IS tis;
1301 
1302       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1303       ierr = ISSort(tis);CHKERRQ(ierr);
1304       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1305       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1306     } else {
1307       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1308     }
1309     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1310     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1311     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1312     if (pcbddc->benign_n) {
1313       IS dummy;
1314 
1315       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1316       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1317       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1318     }
1319     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1320   }
1321 
1322   /* Matrix of coarse basis functions (local) */
1323   if (pcbddc->coarse_phi_B) {
1324     PetscInt on_B,on_primal,on_D=n_D;
1325     if (pcbddc->coarse_phi_D) {
1326       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1327     }
1328     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1329     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1330       PetscScalar *marray;
1331 
1332       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1333       ierr = PetscFree(marray);CHKERRQ(ierr);
1334       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1335       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1336       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1337       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1338     }
1339   }
1340 
1341   if (!pcbddc->coarse_phi_B) {
1342     PetscScalar *marray;
1343 
1344     n = n_B*pcbddc->local_primal_size;
1345     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1346       n += n_D*pcbddc->local_primal_size;
1347     }
1348     if (!pcbddc->symmetric_primal) {
1349       n *= 2;
1350     }
1351     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1352     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1353     n = n_B*pcbddc->local_primal_size;
1354     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1355       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1356       n += n_D*pcbddc->local_primal_size;
1357     }
1358     if (!pcbddc->symmetric_primal) {
1359       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1360       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1361         n = n_B*pcbddc->local_primal_size;
1362         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1363       }
1364     } else {
1365       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1366       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1367       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1368         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1369         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1370       }
1371     }
1372   }
1373 
1374   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1375   p0_lidx_I = NULL;
1376   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1377     const PetscInt *idxs;
1378 
1379     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1380     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1381     for (i=0;i<pcbddc->benign_n;i++) {
1382       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1383     }
1384     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1385   }
1386 
1387   /* vertices */
1388   if (n_vertices) {
1389 
1390     ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1391 
1392     if (n_R) {
1393       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1394       PetscBLASInt B_N,B_one = 1;
1395       PetscScalar  *x,*y;
1396       PetscBool    isseqaij;
1397 
1398       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1399       if (lda_rhs == n_R) {
1400         ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1401       } else {
1402         Mat         A_RVt;
1403         PetscScalar *array;
1404 
1405         /* TODO optimize */
1406         ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr);
1407         ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1408         ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1409         ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr);
1410         for (i=0;i<n_vertices;i++) {
1411           ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr);
1412         }
1413         ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr);
1414         ierr = MatDestroy(&A_RVt);CHKERRQ(ierr);
1415         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr);
1416       }
1417       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1418       if (F) { /* TODO could be optimized for symmetric problems */
1419         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1420       } else {
1421         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1422         for (i=0;i<n_vertices;i++) {
1423           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr);
1424           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr);
1425           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1426           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1427           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1428         }
1429         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1430       }
1431       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1432       /* S_VV and S_CV */
1433       if (n_constraints) {
1434         Mat B;
1435 
1436         ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1437         for (i=0;i<n_vertices;i++) {
1438           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1439           ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr);
1440           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1443           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1444         }
1445         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1446         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1447         ierr = MatDestroy(&B);CHKERRQ(ierr);
1448         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1449         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1450         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1451         ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr);
1452         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
1453         ierr = MatDestroy(&B);CHKERRQ(ierr);
1454       }
1455       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1456       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1457         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1458       }
1459       if (lda_rhs != n_R) {
1460         ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1461         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1462         ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr);
1463       }
1464       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1465       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1466       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1467       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1468       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1469       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1470       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1471       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1472       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1473       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1474     } else {
1475       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1476     }
1477     for (i=0;i<pcbddc->benign_n;i++) {
1478       const PetscScalar *vals;
1479       const PetscInt    *idxs;
1480       PetscInt          n,j,primal_idx;
1481 
1482       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1483       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1484       for (j=0;j<n;j++) {
1485         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1486         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1487       }
1488       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1489     }
1490     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1491 
1492     /* coarse basis functions */
1493     for (i=0;i<n_vertices;i++) {
1494       PetscScalar *y;
1495 
1496       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1497       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1498       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501       y[n_B*i+idx_V_B[i]] = 1.0;
1502       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1503       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1504 
1505       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1506         PetscInt j;
1507 
1508         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1509         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1510         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1513         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1514         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1515       }
1516       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1517     }
1518     /* if n_R == 0 the object is not destroyed */
1519     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1520   }
1521 
1522   if (n_constraints) {
1523     Mat B;
1524 
1525     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1526     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1527     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1528     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1529     if (n_vertices) {
1530       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1531         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1532       } else {
1533         Mat S_VCt;
1534 
1535         if (lda_rhs != n_R) {
1536           ierr = MatDestroy(&B);CHKERRQ(ierr);
1537           ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr);
1538           ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr);
1539         }
1540         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1541         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1542         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1543       }
1544     }
1545     ierr = MatDestroy(&B);CHKERRQ(ierr);
1546     /* coarse basis functions */
1547     for (i=0;i<n_constraints;i++) {
1548       PetscScalar *y;
1549 
1550       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1551       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1552       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1553       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1556       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1557       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1558         PetscInt j;
1559 
1560         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1561         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1562         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1565         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1566         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1567       }
1568       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1569     }
1570   }
1571   if (n_constraints) {
1572     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1573   }
1574   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1575   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1576 
1577   /* compute other basis functions for non-symmetric problems */
1578   if (!pcbddc->symmetric_primal) {
1579     Mat         B_V=NULL,B_C=NULL;
1580     PetscScalar *marray;
1581 
1582     if (n_constraints) {
1583       Mat S_CCT,C_CRT;
1584 
1585       ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr);
1586       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1587       ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1588       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1589       if (n_vertices) {
1590         Mat S_VCT;
1591 
1592         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1593         ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1594         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1595       }
1596       ierr = MatDestroy(&C_CRT);CHKERRQ(ierr);
1597     }
1598     if (n_vertices && n_R) {
1599       PetscScalar    *av,*marray;
1600       const PetscInt *xadj,*adjncy;
1601       PetscInt       n;
1602       PetscBool      flg_row;
1603 
1604       /* B_V = B_V - A_VR^T */
1605       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1606       ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1607       ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr);
1608       ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1609       for (i=0;i<n;i++) {
1610         PetscInt j;
1611         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
1612       }
1613       ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1614       ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1615       ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1616     }
1617 
1618     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1619     ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1620     for (i=0;i<n_vertices;i++) {
1621       ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr);
1622       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1623       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1624       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1625       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1626     }
1627     ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1628     ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr);
1629     for (i=n_vertices;i<n_constraints+n_vertices;i++) {
1630       ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr);
1631       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1632       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1633       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1634       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1635     }
1636     ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr);
1637     /* coarse basis functions */
1638     for (i=0;i<pcbddc->local_primal_size;i++) {
1639       PetscScalar *y;
1640 
1641       ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1642       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1643       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1644       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       if (i<n_vertices) {
1647         y[n_B*i+idx_V_B[i]] = 1.0;
1648       }
1649       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1650       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1651 
1652       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1653         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1654         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1655         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1658         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1659       }
1660       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1661     }
1662     ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1663     ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1664   }
1665   /* free memory */
1666   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1667   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1668   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1669   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1670   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1671   ierr = PetscFree(work);CHKERRQ(ierr);
1672   if (n_vertices) {
1673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1674   }
1675   if (n_constraints) {
1676     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1677   }
1678   /* Checking coarse_sub_mat and coarse basis functios */
1679   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1680   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1681   if (pcbddc->dbg_flag) {
1682     Mat         coarse_sub_mat;
1683     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1684     Mat         coarse_phi_D,coarse_phi_B;
1685     Mat         coarse_psi_D,coarse_psi_B;
1686     Mat         A_II,A_BB,A_IB,A_BI;
1687     Mat         C_B,CPHI;
1688     IS          is_dummy;
1689     Vec         mones;
1690     MatType     checkmattype=MATSEQAIJ;
1691     PetscReal   real_value;
1692 
1693     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1694     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1695     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1696     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1697     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1698     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1699     if (!pcbddc->symmetric_primal) {
1700       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1701       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1702     }
1703     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1704 
1705     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1706     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1707     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1708     if (!pcbddc->symmetric_primal) {
1709       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1710       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1711       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1712       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1713       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1714       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1715       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1716       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1717       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1718       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1719       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1720       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1721     } else {
1722       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1723       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1724       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1725       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1726       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1727       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1728       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1729       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1730     }
1731     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1732     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1733     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1734     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1735     if (pcbddc->benign_n) {
1736       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1737       PetscScalar *data,*data2;
1738       PetscInt    j;
1739 
1740       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1741       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1742       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1743       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1744       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1745       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1746       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1747       for (j=0;j<pcbddc->benign_n;j++) {
1748         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1749         for (i=0;i<pcbddc->local_primal_size;i++) {
1750           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1751           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1752         }
1753       }
1754       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1755       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1756       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1757       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1758       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1759       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1760       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1761       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1762       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1763     }
1764 #if 0
1765   {
1766     PetscViewer viewer;
1767     char filename[256];
1768     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
1769     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1770     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1771     ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr);
1772     ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr);
1773     ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr);
1774     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1775     ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr);
1776     ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr);
1777     if (pcbddc->coarse_phi_D) {
1778       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr);
1779       ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr);
1780     }
1781     if (pcbddc->coarse_psi_B) {
1782       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr);
1783       ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr);
1784     }
1785     if (pcbddc->coarse_phi_B) {
1786       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr);
1787       ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr);
1788     }
1789     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1790   }
1791 #endif
1792     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1793     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1794     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1796 
1797     /* check constraints */
1798     if (!pcbddc->benign_n) { /* TODO: add benign case */
1799       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1800       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1801       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1802       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1803       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1804       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1805       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1806       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1807       if (!pcbddc->symmetric_primal) {
1808         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1809         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1810         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1811         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1812         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1813       }
1814       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1815       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1816       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1817       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1818     }
1819     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1820     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1821     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1822     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1823     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1824     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1825     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1826     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1827     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1828     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1829     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1830     if (!pcbddc->symmetric_primal) {
1831       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1832       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1833     }
1834     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1835   }
1836   /* get back data */
1837   *coarse_submat_vals_n = coarse_submat_vals;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1843 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1844 {
1845   Mat            *work_mat;
1846   IS             isrow_s,iscol_s;
1847   PetscBool      rsorted,csorted;
1848   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1849   PetscErrorCode ierr;
1850 
1851   PetscFunctionBegin;
1852   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1853   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1854   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1855   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1856 
1857   if (!rsorted) {
1858     const PetscInt *idxs;
1859     PetscInt *idxs_sorted,i;
1860 
1861     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1862     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1863     for (i=0;i<rsize;i++) {
1864       idxs_perm_r[i] = i;
1865     }
1866     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1867     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1868     for (i=0;i<rsize;i++) {
1869       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1870     }
1871     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1872     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1873   } else {
1874     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1875     isrow_s = isrow;
1876   }
1877 
1878   if (!csorted) {
1879     if (isrow == iscol) {
1880       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1881       iscol_s = isrow_s;
1882     } else {
1883       const PetscInt *idxs;
1884       PetscInt *idxs_sorted,i;
1885 
1886       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1887       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1888       for (i=0;i<csize;i++) {
1889         idxs_perm_c[i] = i;
1890       }
1891       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1892       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1893       for (i=0;i<csize;i++) {
1894         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1895       }
1896       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1897       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1898     }
1899   } else {
1900     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1901     iscol_s = iscol;
1902   }
1903 
1904   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1905 
1906   if (!rsorted || !csorted) {
1907     Mat      new_mat;
1908     IS       is_perm_r,is_perm_c;
1909 
1910     if (!rsorted) {
1911       PetscInt *idxs_r,i;
1912       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1913       for (i=0;i<rsize;i++) {
1914         idxs_r[idxs_perm_r[i]] = i;
1915       }
1916       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1917       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1918     } else {
1919       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1920     }
1921     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1922 
1923     if (!csorted) {
1924       if (isrow_s == iscol_s) {
1925         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1926         is_perm_c = is_perm_r;
1927       } else {
1928         PetscInt *idxs_c,i;
1929         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1930         for (i=0;i<csize;i++) {
1931           idxs_c[idxs_perm_c[i]] = i;
1932         }
1933         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1934         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1935       }
1936     } else {
1937       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1938     }
1939     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1940 
1941     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1942     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1943     work_mat[0] = new_mat;
1944     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1945     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1946   }
1947 
1948   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1949   *B = work_mat[0];
1950   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1951   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1952   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1958 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1959 {
1960   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1961   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1962   Mat            new_mat;
1963   IS             is_local,is_global;
1964   PetscInt       local_size;
1965   PetscBool      isseqaij;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1970   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1971   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1972   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1973   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1974   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1975   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1976 
1977   /* check */
1978   if (pcbddc->dbg_flag) {
1979     Vec       x,x_change;
1980     PetscReal error;
1981 
1982     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1983     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1984     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1985     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1988     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1989     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1990     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1991     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1993     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1994     ierr = VecDestroy(&x);CHKERRQ(ierr);
1995     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1996   }
1997 
1998   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1999   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
2000   if (isseqaij) {
2001     Mat M;
2002 
2003     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2004     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2005     ierr = MatDestroy(&M);CHKERRQ(ierr);
2006   } else {
2007     Mat work_mat,M;
2008 
2009     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
2010     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2011     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2012     ierr = MatDestroy(&M);CHKERRQ(ierr);
2013     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
2014   }
2015   if (matis->A->symmetric_set) {
2016     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
2017 #if !defined(PETSC_USE_COMPLEX)
2018     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
2019 #endif
2020   }
2021   /*
2022   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2023   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
2024   */
2025   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
2031 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2032 {
2033   PC_IS*          pcis = (PC_IS*)(pc->data);
2034   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2035   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2036   PetscInt        *idx_R_local=NULL;
2037   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2038   PetscInt        vbs,bs;
2039   PetscBT         bitmask=NULL;
2040   PetscErrorCode  ierr;
2041 
2042   PetscFunctionBegin;
2043   /*
2044     No need to setup local scatters if
2045       - primal space is unchanged
2046         AND
2047       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2048         AND
2049       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2050   */
2051   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2052     PetscFunctionReturn(0);
2053   }
2054   /* destroy old objects */
2055   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2056   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2057   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2058   /* Set Non-overlapping dimensions */
2059   n_B = pcis->n_B;
2060   n_D = pcis->n - n_B;
2061   n_vertices = pcbddc->n_vertices;
2062 
2063   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2064 
2065   /* create auxiliary bitmask and allocate workspace */
2066   if (!sub_schurs->reuse_mumps) {
2067     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2068     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2069     for (i=0;i<n_vertices;i++) {
2070       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2071     }
2072 
2073     for (i=0, n_R=0; i<pcis->n; i++) {
2074       if (!PetscBTLookup(bitmask,i)) {
2075         idx_R_local[n_R++] = i;
2076       }
2077     }
2078   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2079     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2080 
2081     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2082     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2083   }
2084 
2085   /* Block code */
2086   vbs = 1;
2087   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2088   if (bs>1 && !(n_vertices%bs)) {
2089     PetscBool is_blocked = PETSC_TRUE;
2090     PetscInt  *vary;
2091     if (!sub_schurs->reuse_mumps) {
2092       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2093       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2094       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2095       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2096       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2097       for (i=0; i<pcis->n/bs; i++) {
2098         if (vary[i]!=0 && vary[i]!=bs) {
2099           is_blocked = PETSC_FALSE;
2100           break;
2101         }
2102       }
2103       ierr = PetscFree(vary);CHKERRQ(ierr);
2104     } else {
2105       /* Verify directly the R set */
2106       for (i=0; i<n_R/bs; i++) {
2107         PetscInt j,node=idx_R_local[bs*i];
2108         for (j=1; j<bs; j++) {
2109           if (node != idx_R_local[bs*i+j]-j) {
2110             is_blocked = PETSC_FALSE;
2111             break;
2112           }
2113         }
2114       }
2115     }
2116     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2117       vbs = bs;
2118       for (i=0;i<n_R/vbs;i++) {
2119         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2120       }
2121     }
2122   }
2123   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2124   if (sub_schurs->reuse_mumps) {
2125     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2126 
2127     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2128     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2129     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2130     reuse_mumps->is_R = pcbddc->is_R_local;
2131   } else {
2132     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2133   }
2134 
2135   /* print some info if requested */
2136   if (pcbddc->dbg_flag) {
2137     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2138     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2139     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2140     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2141     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2142     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2143     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2144   }
2145 
2146   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2147   if (!sub_schurs->reuse_mumps) {
2148     IS       is_aux1,is_aux2;
2149     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2150 
2151     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2152     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2153     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2154     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2155     for (i=0; i<n_D; i++) {
2156       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2157     }
2158     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2159     for (i=0, j=0; i<n_R; i++) {
2160       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2161         aux_array1[j++] = i;
2162       }
2163     }
2164     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2165     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2166     for (i=0, j=0; i<n_B; i++) {
2167       if (!PetscBTLookup(bitmask,is_indices[i])) {
2168         aux_array2[j++] = i;
2169       }
2170     }
2171     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2172     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2173     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2174     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2175     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2176 
2177     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2178       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2179       for (i=0, j=0; i<n_R; i++) {
2180         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2181           aux_array1[j++] = i;
2182         }
2183       }
2184       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2185       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2186       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2187     }
2188     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2189     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2190   } else {
2191     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2192     IS               tis;
2193     PetscInt         schur_size;
2194 
2195     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2196     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2197     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2198     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2199     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2200       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2201       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2202       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2203     }
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2211 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2212 {
2213   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2214   PC_IS          *pcis = (PC_IS*)pc->data;
2215   PC             pc_temp;
2216   Mat            A_RR;
2217   MatReuse       reuse;
2218   PetscScalar    m_one = -1.0;
2219   PetscReal      value;
2220   PetscInt       n_D,n_R;
2221   PetscBool      use_exact,use_exact_reduced,issbaij;
2222   PetscErrorCode ierr;
2223   /* prefixes stuff */
2224   char           dir_prefix[256],neu_prefix[256],str_level[16];
2225   size_t         len;
2226 
2227   PetscFunctionBegin;
2228 
2229   /* compute prefixes */
2230   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2231   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2232   if (!pcbddc->current_level) {
2233     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2234     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2235     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2236     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2237   } else {
2238     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2239     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2240     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2241     len -= 15; /* remove "pc_bddc_coarse_" */
2242     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2243     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2244     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2245     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2246     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2247     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2248     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2249     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2250   }
2251 
2252   /* DIRICHLET PROBLEM */
2253   if (dirichlet) {
2254     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2255     if (pcbddc->local_mat->symmetric_set) {
2256       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2257     }
2258     /* Matrix for Dirichlet problem is pcis->A_II */
2259     n_D = pcis->n - pcis->n_B;
2260     if (!pcbddc->ksp_D) { /* create object if not yet build */
2261       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2262       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2263       /* default */
2264       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2265       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2266       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2267       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2268       if (issbaij) {
2269         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2270       } else {
2271         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2272       }
2273       /* Allow user's customization */
2274       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2275       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2276     }
2277     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2278     if (sub_schurs->reuse_mumps) {
2279       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2280 
2281       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2282     }
2283     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2284     if (!n_D) {
2285       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2286       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2287     }
2288     /* Set Up KSP for Dirichlet problem of BDDC */
2289     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2290     /* set ksp_D into pcis data */
2291     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2292     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2293     pcis->ksp_D = pcbddc->ksp_D;
2294   }
2295 
2296   /* NEUMANN PROBLEM */
2297   A_RR = 0;
2298   if (neumann) {
2299     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2300     PetscInt        ibs,mbs;
2301     PetscBool       issbaij;
2302     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2303     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2304     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2305     if (pcbddc->ksp_R) { /* already created ksp */
2306       PetscInt nn_R;
2307       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2308       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2309       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2310       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2311         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2312         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2313         reuse = MAT_INITIAL_MATRIX;
2314       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2315         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2316           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2317           reuse = MAT_INITIAL_MATRIX;
2318         } else { /* safe to reuse the matrix */
2319           reuse = MAT_REUSE_MATRIX;
2320         }
2321       }
2322       /* last check */
2323       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2324         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2325         reuse = MAT_INITIAL_MATRIX;
2326       }
2327     } else { /* first time, so we need to create the matrix */
2328       reuse = MAT_INITIAL_MATRIX;
2329     }
2330     /* extract A_RR */
2331     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2332     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2333     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2334     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2335       if (matis->A == pcbddc->local_mat) {
2336         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2337         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2338       } else {
2339         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2340       }
2341     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2342       if (matis->A == pcbddc->local_mat) {
2343         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2344         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2345       } else {
2346         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2347       }
2348     }
2349     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2350     if (pcbddc->local_mat->symmetric_set) {
2351       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2352     }
2353     if (!pcbddc->ksp_R) { /* create object if not present */
2354       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2355       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2356       /* default */
2357       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2358       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2359       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2360       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2361       if (issbaij) {
2362         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2363       } else {
2364         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2365       }
2366       /* Allow user's customization */
2367       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2368       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2369     }
2370     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2371     if (!n_R) {
2372       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2373       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2374     }
2375     /* Reuse MUMPS solver if it is present */
2376     if (sub_schurs->reuse_mumps) {
2377       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2378 
2379       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2380     }
2381     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2382     /* Set Up KSP for Neumann problem of BDDC */
2383     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2384   }
2385   /* free Neumann problem's matrix */
2386   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2387 
2388   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2389   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2390     if (pcbddc->dbg_flag) {
2391       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2392       ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2393       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2394     }
2395     if (dirichlet) { /* Dirichlet */
2396       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2397       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2398       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2399       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2400       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2401       /* need to be adapted? */
2402       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2403       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2404       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2405       /* print info */
2406       if (pcbddc->dbg_flag) {
2407         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2408         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2409       }
2410       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2411         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2412       }
2413     }
2414     if (neumann) { /* Neumann */
2415       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2416       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2417       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2418       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2419       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2420       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2421       /* need to be adapted? */
2422       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2423       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2424       /* print info */
2425       if (pcbddc->dbg_flag) {
2426         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2427         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2428       }
2429       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2430         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2431       }
2432     }
2433   }
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2439 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2440 {
2441   PetscErrorCode  ierr;
2442   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2443   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2444 
2445   PetscFunctionBegin;
2446   if (!sub_schurs->reuse_mumps) {
2447     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2448   }
2449   if (!pcbddc->switch_static) {
2450     if (applytranspose && pcbddc->local_auxmat1) {
2451       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2452       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2453     }
2454     if (!sub_schurs->reuse_mumps) {
2455       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2456       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2457     } else {
2458       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2459 
2460       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2461       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2462     }
2463   } else {
2464     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2465     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2467     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2468     if (applytranspose && pcbddc->local_auxmat1) {
2469       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2470       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2471       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2472       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2473     }
2474   }
2475   if (!sub_schurs->reuse_mumps) {
2476     if (applytranspose) {
2477       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2478     } else {
2479       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2480     }
2481 #if defined(PETSC_HAVE_MUMPS)
2482   } else {
2483     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2484 
2485     if (applytranspose) {
2486       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2487     } else {
2488       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2489     }
2490 #endif
2491   }
2492   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2493   if (!pcbddc->switch_static) {
2494     if (!sub_schurs->reuse_mumps) {
2495       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2496       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2497     } else {
2498       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2499 
2500       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2501       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2502     }
2503     if (!applytranspose && pcbddc->local_auxmat1) {
2504       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2505       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2506     }
2507   } else {
2508     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2509     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2510     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2511     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512     if (!applytranspose && pcbddc->local_auxmat1) {
2513       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2514       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2515     }
2516     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2517     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2518     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2519     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2525 #undef __FUNCT__
2526 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2527 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2528 {
2529   PetscErrorCode ierr;
2530   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2531   PC_IS*            pcis = (PC_IS*)  (pc->data);
2532   const PetscScalar zero = 0.0;
2533 
2534   PetscFunctionBegin;
2535   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2536   if (applytranspose) {
2537     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2538     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2539   } else {
2540     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2541     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2542   }
2543 
2544   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2545   if (pcbddc->benign_n) {
2546     PetscScalar *array;
2547     PetscInt    j;
2548 
2549     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2550     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2551     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2552   }
2553 
2554   /* start communications from local primal nodes to rhs of coarse solver */
2555   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2556   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2557   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2558 
2559   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2560   /* TODO remove null space when doing multilevel */
2561   if (pcbddc->coarse_ksp) {
2562     Mat coarse_mat;
2563     Vec rhs,sol;
2564     MatNullSpace nullsp;
2565 
2566     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2567     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2568     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2569     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2570     if (nullsp) {
2571       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2572     }
2573     if (applytranspose) {
2574       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2575     } else {
2576       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2577     }
2578     if (nullsp) {
2579       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2580     }
2581   }
2582 
2583   /* Local solution on R nodes */
2584   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2585     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2586   }
2587 
2588   /* communications from coarse sol to local primal nodes */
2589   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2590   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2591 
2592   /* Sum contributions from two levels */
2593   if (applytranspose) {
2594     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2595     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2596   } else {
2597     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2598     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2599   }
2600   /* store p0 */
2601   if (pcbddc->benign_n) {
2602     PetscScalar *array;
2603     PetscInt    j;
2604 
2605     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2606     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2607     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2608   }
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2614 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2615 {
2616   PetscErrorCode ierr;
2617   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2618   PetscScalar    *array;
2619   Vec            from,to;
2620 
2621   PetscFunctionBegin;
2622   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2623     from = pcbddc->coarse_vec;
2624     to = pcbddc->vec1_P;
2625     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2626       Vec tvec;
2627 
2628       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2629       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2630       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2631       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2632       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2633       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2634     }
2635   } else { /* from local to global -> put data in coarse right hand side */
2636     from = pcbddc->vec1_P;
2637     to = pcbddc->coarse_vec;
2638   }
2639   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2645 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2646 {
2647   PetscErrorCode ierr;
2648   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2649   PetscScalar    *array;
2650   Vec            from,to;
2651 
2652   PetscFunctionBegin;
2653   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2654     from = pcbddc->coarse_vec;
2655     to = pcbddc->vec1_P;
2656   } else { /* from local to global -> put data in coarse right hand side */
2657     from = pcbddc->vec1_P;
2658     to = pcbddc->coarse_vec;
2659   }
2660   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2661   if (smode == SCATTER_FORWARD) {
2662     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2663       Vec tvec;
2664 
2665       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2666       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2667       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2668       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2669     }
2670   } else {
2671     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2672      ierr = VecResetArray(from);CHKERRQ(ierr);
2673     }
2674   }
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 /* uncomment for testing purposes */
2679 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2680 #undef __FUNCT__
2681 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2682 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2683 {
2684   PetscErrorCode    ierr;
2685   PC_IS*            pcis = (PC_IS*)(pc->data);
2686   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2687   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2688   /* one and zero */
2689   PetscScalar       one=1.0,zero=0.0;
2690   /* space to store constraints and their local indices */
2691   PetscScalar       *constraints_data;
2692   PetscInt          *constraints_idxs,*constraints_idxs_B;
2693   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2694   PetscInt          *constraints_n;
2695   /* iterators */
2696   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2697   /* BLAS integers */
2698   PetscBLASInt      lwork,lierr;
2699   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2700   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2701   /* reuse */
2702   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2703   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2704   /* change of basis */
2705   PetscBool         qr_needed;
2706   PetscBT           change_basis,qr_needed_idx;
2707   /* auxiliary stuff */
2708   PetscInt          *nnz,*is_indices;
2709   PetscInt          ncc;
2710   /* some quantities */
2711   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2712   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2713 
2714   PetscFunctionBegin;
2715   /* Destroy Mat objects computed previously */
2716   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2717   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2718   /* save info on constraints from previous setup (if any) */
2719   olocal_primal_size = pcbddc->local_primal_size;
2720   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2721   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2722   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2723   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2724   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2725   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2726 
2727   /* print some info */
2728   if (pcbddc->dbg_flag) {
2729     IS       vertices;
2730     PetscInt nv,nedges,nfaces;
2731     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2732     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2733     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2734     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2735     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2736     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2737     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2738     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2739     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2740     ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2741   }
2742 
2743   if (!pcbddc->adaptive_selection) {
2744     IS           ISForVertices,*ISForFaces,*ISForEdges;
2745     MatNullSpace nearnullsp;
2746     const Vec    *nearnullvecs;
2747     Vec          *localnearnullsp;
2748     PetscScalar  *array;
2749     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2750     PetscBool    nnsp_has_cnst;
2751     /* LAPACK working arrays for SVD or POD */
2752     PetscBool    skip_lapack,boolforchange;
2753     PetscScalar  *work;
2754     PetscReal    *singular_vals;
2755 #if defined(PETSC_USE_COMPLEX)
2756     PetscReal    *rwork;
2757 #endif
2758 #if defined(PETSC_MISSING_LAPACK_GESVD)
2759     PetscScalar  *temp_basis,*correlation_mat;
2760 #else
2761     PetscBLASInt dummy_int=1;
2762     PetscScalar  dummy_scalar=1.;
2763 #endif
2764 
2765     /* Get index sets for faces, edges and vertices from graph */
2766     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2767     /* free unneeded index sets */
2768     if (!pcbddc->use_vertices) {
2769       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2770     }
2771     if (!pcbddc->use_edges) {
2772       for (i=0;i<n_ISForEdges;i++) {
2773         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2774       }
2775       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2776       n_ISForEdges = 0;
2777     }
2778     if (!pcbddc->use_faces) {
2779       for (i=0;i<n_ISForFaces;i++) {
2780         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2781       }
2782       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2783       n_ISForFaces = 0;
2784     }
2785 
2786 #if defined(PETSC_USE_DEBUG)
2787     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2788        Also use_change_of_basis should be consistent among processors */
2789     if (pcbddc->NullSpace) {
2790       PetscBool tbool[2],gbool[2];
2791 
2792       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2793         pcbddc->use_change_of_basis = PETSC_TRUE;
2794         if (!ISForEdges) {
2795           pcbddc->use_change_on_faces = PETSC_TRUE;
2796         }
2797       }
2798       tbool[0] = pcbddc->use_change_of_basis;
2799       tbool[1] = pcbddc->use_change_on_faces;
2800       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2801       pcbddc->use_change_of_basis = gbool[0];
2802       pcbddc->use_change_on_faces = gbool[1];
2803     }
2804 #endif
2805 
2806     /* check if near null space is attached to global mat */
2807     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2808     if (nearnullsp) {
2809       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2810       /* remove any stored info */
2811       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2812       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2813       /* store information for BDDC solver reuse */
2814       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2815       pcbddc->onearnullspace = nearnullsp;
2816       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2817       for (i=0;i<nnsp_size;i++) {
2818         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2819       }
2820     } else { /* if near null space is not provided BDDC uses constants by default */
2821       nnsp_size = 0;
2822       nnsp_has_cnst = PETSC_TRUE;
2823     }
2824     /* get max number of constraints on a single cc */
2825     max_constraints = nnsp_size;
2826     if (nnsp_has_cnst) max_constraints++;
2827 
2828     /*
2829          Evaluate maximum storage size needed by the procedure
2830          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2831          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2832          There can be multiple constraints per connected component
2833                                                                                                                                                            */
2834     n_vertices = 0;
2835     if (ISForVertices) {
2836       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2837     }
2838     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2839     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2840 
2841     total_counts = n_ISForFaces+n_ISForEdges;
2842     total_counts *= max_constraints;
2843     total_counts += n_vertices;
2844     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2845 
2846     total_counts = 0;
2847     max_size_of_constraint = 0;
2848     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2849       IS used_is;
2850       if (i<n_ISForEdges) {
2851         used_is = ISForEdges[i];
2852       } else {
2853         used_is = ISForFaces[i-n_ISForEdges];
2854       }
2855       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2856       total_counts += j;
2857       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2858     }
2859     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2860 
2861     /* get local part of global near null space vectors */
2862     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2863     for (k=0;k<nnsp_size;k++) {
2864       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2865       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2866       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2867     }
2868 
2869     /* whether or not to skip lapack calls */
2870     skip_lapack = PETSC_TRUE;
2871     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2872 
2873     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2874     if (!skip_lapack) {
2875       PetscScalar temp_work;
2876 
2877 #if defined(PETSC_MISSING_LAPACK_GESVD)
2878       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2879       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2880       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2881       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2882 #if defined(PETSC_USE_COMPLEX)
2883       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2884 #endif
2885       /* now we evaluate the optimal workspace using query with lwork=-1 */
2886       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2887       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2888       lwork = -1;
2889       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2890 #if !defined(PETSC_USE_COMPLEX)
2891       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2892 #else
2893       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2894 #endif
2895       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2896       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2897 #else /* on missing GESVD */
2898       /* SVD */
2899       PetscInt max_n,min_n;
2900       max_n = max_size_of_constraint;
2901       min_n = max_constraints;
2902       if (max_size_of_constraint < max_constraints) {
2903         min_n = max_size_of_constraint;
2904         max_n = max_constraints;
2905       }
2906       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2907 #if defined(PETSC_USE_COMPLEX)
2908       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2909 #endif
2910       /* now we evaluate the optimal workspace using query with lwork=-1 */
2911       lwork = -1;
2912       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2913       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2914       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2915       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2916 #if !defined(PETSC_USE_COMPLEX)
2917       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2918 #else
2919       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2920 #endif
2921       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2922       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2923 #endif /* on missing GESVD */
2924       /* Allocate optimal workspace */
2925       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2926       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2927     }
2928     /* Now we can loop on constraining sets */
2929     total_counts = 0;
2930     constraints_idxs_ptr[0] = 0;
2931     constraints_data_ptr[0] = 0;
2932     /* vertices */
2933     if (n_vertices) {
2934       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2935       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2936       for (i=0;i<n_vertices;i++) {
2937         constraints_n[total_counts] = 1;
2938         constraints_data[total_counts] = 1.0;
2939         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2940         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2941         total_counts++;
2942       }
2943       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2944       n_vertices = total_counts;
2945     }
2946 
2947     /* edges and faces */
2948     total_counts_cc = total_counts;
2949     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2950       IS        used_is;
2951       PetscBool idxs_copied = PETSC_FALSE;
2952 
2953       if (ncc<n_ISForEdges) {
2954         used_is = ISForEdges[ncc];
2955         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2956       } else {
2957         used_is = ISForFaces[ncc-n_ISForEdges];
2958         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2959       }
2960       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2961 
2962       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2963       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2964       /* change of basis should not be performed on local periodic nodes */
2965       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2966       if (nnsp_has_cnst) {
2967         PetscScalar quad_value;
2968 
2969         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2970         idxs_copied = PETSC_TRUE;
2971 
2972         if (!pcbddc->use_nnsp_true) {
2973           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2974         } else {
2975           quad_value = 1.0;
2976         }
2977         for (j=0;j<size_of_constraint;j++) {
2978           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2979         }
2980         temp_constraints++;
2981         total_counts++;
2982       }
2983       for (k=0;k<nnsp_size;k++) {
2984         PetscReal real_value;
2985         PetscScalar *ptr_to_data;
2986 
2987         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2988         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2989         for (j=0;j<size_of_constraint;j++) {
2990           ptr_to_data[j] = array[is_indices[j]];
2991         }
2992         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2993         /* check if array is null on the connected component */
2994         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2995         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2996         if (real_value > 0.0) { /* keep indices and values */
2997           temp_constraints++;
2998           total_counts++;
2999           if (!idxs_copied) {
3000             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
3001             idxs_copied = PETSC_TRUE;
3002           }
3003         }
3004       }
3005       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3006       valid_constraints = temp_constraints;
3007       if (!pcbddc->use_nnsp_true && temp_constraints) {
3008         if (temp_constraints == 1) { /* just normalize the constraint */
3009           PetscScalar norm,*ptr_to_data;
3010 
3011           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3012           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3013           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
3014           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
3015           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
3016         } else { /* perform SVD */
3017           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
3018           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3019 
3020 #if defined(PETSC_MISSING_LAPACK_GESVD)
3021           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
3022              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
3023              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
3024                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3025                 from that computed using LAPACKgesvd
3026              -> This is due to a different computation of eigenvectors in LAPACKheev
3027              -> The quality of the POD-computed basis will be the same */
3028           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3029           /* Store upper triangular part of correlation matrix */
3030           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3031           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3032           for (j=0;j<temp_constraints;j++) {
3033             for (k=0;k<j+1;k++) {
3034               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3035             }
3036           }
3037           /* compute eigenvalues and eigenvectors of correlation matrix */
3038           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3039           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3040 #if !defined(PETSC_USE_COMPLEX)
3041           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3042 #else
3043           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3044 #endif
3045           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3046           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3047           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3048           j = 0;
3049           while (j < temp_constraints && singular_vals[j] < tol) j++;
3050           total_counts = total_counts-j;
3051           valid_constraints = temp_constraints-j;
3052           /* scale and copy POD basis into used quadrature memory */
3053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3054           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3055           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3056           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3057           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3058           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3059           if (j<temp_constraints) {
3060             PetscInt ii;
3061             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3062             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3063             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3064             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3065             for (k=0;k<temp_constraints-j;k++) {
3066               for (ii=0;ii<size_of_constraint;ii++) {
3067                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3068               }
3069             }
3070           }
3071 #else  /* on missing GESVD */
3072           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3073           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3075           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3076 #if !defined(PETSC_USE_COMPLEX)
3077           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3078 #else
3079           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3080 #endif
3081           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3082           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3083           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3084           k = temp_constraints;
3085           if (k > size_of_constraint) k = size_of_constraint;
3086           j = 0;
3087           while (j < k && singular_vals[k-j-1] < tol) j++;
3088           valid_constraints = k-j;
3089           total_counts = total_counts-temp_constraints+valid_constraints;
3090 #endif /* on missing GESVD */
3091         }
3092       }
3093       /* update pointers information */
3094       if (valid_constraints) {
3095         constraints_n[total_counts_cc] = valid_constraints;
3096         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3097         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3098         /* set change_of_basis flag */
3099         if (boolforchange) {
3100           PetscBTSet(change_basis,total_counts_cc);
3101         }
3102         total_counts_cc++;
3103       }
3104     }
3105     /* free workspace */
3106     if (!skip_lapack) {
3107       ierr = PetscFree(work);CHKERRQ(ierr);
3108 #if defined(PETSC_USE_COMPLEX)
3109       ierr = PetscFree(rwork);CHKERRQ(ierr);
3110 #endif
3111       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3112 #if defined(PETSC_MISSING_LAPACK_GESVD)
3113       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3114       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3115 #endif
3116     }
3117     for (k=0;k<nnsp_size;k++) {
3118       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3119     }
3120     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3121     /* free index sets of faces, edges and vertices */
3122     for (i=0;i<n_ISForFaces;i++) {
3123       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3124     }
3125     if (n_ISForFaces) {
3126       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3127     }
3128     for (i=0;i<n_ISForEdges;i++) {
3129       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3130     }
3131     if (n_ISForEdges) {
3132       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3133     }
3134     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3135   } else {
3136     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3137 
3138     total_counts = 0;
3139     n_vertices = 0;
3140     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3141       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3142     }
3143     max_constraints = 0;
3144     total_counts_cc = 0;
3145     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3146       total_counts += pcbddc->adaptive_constraints_n[i];
3147       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3148       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3149     }
3150     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3151     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3152     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3153     constraints_data = pcbddc->adaptive_constraints_data;
3154     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3155     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3156     total_counts_cc = 0;
3157     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3158       if (pcbddc->adaptive_constraints_n[i]) {
3159         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3160       }
3161     }
3162 #if 0
3163     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3164     for (i=0;i<total_counts_cc;i++) {
3165       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3166       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3167       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3168         printf(" %d",constraints_idxs[j]);
3169       }
3170       printf("\n");
3171       printf("number of cc: %d\n",constraints_n[i]);
3172     }
3173     for (i=0;i<n_vertices;i++) {
3174       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3175     }
3176     for (i=0;i<sub_schurs->n_subs;i++) {
3177       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3178     }
3179 #endif
3180 
3181     max_size_of_constraint = 0;
3182     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3183     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3184     /* Change of basis */
3185     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3186     if (pcbddc->use_change_of_basis) {
3187       for (i=0;i<sub_schurs->n_subs;i++) {
3188         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3189           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3190         }
3191       }
3192     }
3193   }
3194   pcbddc->local_primal_size = total_counts;
3195   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3196 
3197   /* map constraints_idxs in boundary numbering */
3198   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3199   if (i != constraints_idxs_ptr[total_counts_cc]) {
3200     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3201   }
3202 
3203   /* Create constraint matrix */
3204   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3205   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3206   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3207 
3208   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3209   /* determine if a QR strategy is needed for change of basis */
3210   qr_needed = PETSC_FALSE;
3211   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3212   total_primal_vertices=0;
3213   pcbddc->local_primal_size_cc = 0;
3214   for (i=0;i<total_counts_cc;i++) {
3215     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3216     if (size_of_constraint == 1) {
3217       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3218       pcbddc->local_primal_size_cc += 1;
3219     } else if (PetscBTLookup(change_basis,i)) {
3220       for (k=0;k<constraints_n[i];k++) {
3221         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3222       }
3223       pcbddc->local_primal_size_cc += constraints_n[i];
3224       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3225         PetscBTSet(qr_needed_idx,i);
3226         qr_needed = PETSC_TRUE;
3227       }
3228     } else {
3229       pcbddc->local_primal_size_cc += 1;
3230     }
3231   }
3232   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3233   pcbddc->n_vertices = total_primal_vertices;
3234   /* permute indices in order to have a sorted set of vertices */
3235   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3236 
3237   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3238   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3239   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3240 
3241   /* nonzero structure of constraint matrix */
3242   /* and get reference dof for local constraints */
3243   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3244   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3245 
3246   j = total_primal_vertices;
3247   total_counts = total_primal_vertices;
3248   cum = total_primal_vertices;
3249   for (i=n_vertices;i<total_counts_cc;i++) {
3250     if (!PetscBTLookup(change_basis,i)) {
3251       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3252       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3253       cum++;
3254       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3255       for (k=0;k<constraints_n[i];k++) {
3256         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3257         nnz[j+k] = size_of_constraint;
3258       }
3259       j += constraints_n[i];
3260     }
3261   }
3262   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3263   ierr = PetscFree(nnz);CHKERRQ(ierr);
3264 
3265   /* set values in constraint matrix */
3266   for (i=0;i<total_primal_vertices;i++) {
3267     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3268   }
3269   total_counts = total_primal_vertices;
3270   for (i=n_vertices;i<total_counts_cc;i++) {
3271     if (!PetscBTLookup(change_basis,i)) {
3272       PetscInt *cols;
3273 
3274       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3275       cols = constraints_idxs+constraints_idxs_ptr[i];
3276       for (k=0;k<constraints_n[i];k++) {
3277         PetscInt    row = total_counts+k;
3278         PetscScalar *vals;
3279 
3280         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3281         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3282       }
3283       total_counts += constraints_n[i];
3284     }
3285   }
3286   /* assembling */
3287   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289 
3290   /*
3291   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3292   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3293   */
3294   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3295   if (pcbddc->use_change_of_basis) {
3296     /* dual and primal dofs on a single cc */
3297     PetscInt     dual_dofs,primal_dofs;
3298     /* working stuff for GEQRF */
3299     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3300     PetscBLASInt lqr_work;
3301     /* working stuff for UNGQR */
3302     PetscScalar  *gqr_work,lgqr_work_t;
3303     PetscBLASInt lgqr_work;
3304     /* working stuff for TRTRS */
3305     PetscScalar  *trs_rhs;
3306     PetscBLASInt Blas_NRHS;
3307     /* pointers for values insertion into change of basis matrix */
3308     PetscInt     *start_rows,*start_cols;
3309     PetscScalar  *start_vals;
3310     /* working stuff for values insertion */
3311     PetscBT      is_primal;
3312     PetscInt     *aux_primal_numbering_B;
3313     /* matrix sizes */
3314     PetscInt     global_size,local_size;
3315     /* temporary change of basis */
3316     Mat          localChangeOfBasisMatrix;
3317     /* extra space for debugging */
3318     PetscScalar  *dbg_work;
3319 
3320     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3321     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3322     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3323     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3324     /* nonzeros for local mat */
3325     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3326     for (i=0;i<pcis->n;i++) nnz[i]=1;
3327     for (i=n_vertices;i<total_counts_cc;i++) {
3328       if (PetscBTLookup(change_basis,i)) {
3329         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3330         if (PetscBTLookup(qr_needed_idx,i)) {
3331           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3332         } else {
3333           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3334           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3335         }
3336       }
3337     }
3338     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3339     ierr = PetscFree(nnz);CHKERRQ(ierr);
3340     /* Set initial identity in the matrix */
3341     for (i=0;i<pcis->n;i++) {
3342       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3343     }
3344 
3345     if (pcbddc->dbg_flag) {
3346       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3347       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3348     }
3349 
3350 
3351     /* Now we loop on the constraints which need a change of basis */
3352     /*
3353        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3354        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3355 
3356        Basic blocks of change of basis matrix T computed by
3357 
3358           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3359 
3360             | 1        0   ...        0         s_1/S |
3361             | 0        1   ...        0         s_2/S |
3362             |              ...                        |
3363             | 0        ...            1     s_{n-1}/S |
3364             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3365 
3366             with S = \sum_{i=1}^n s_i^2
3367             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3368                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3369 
3370           - QR decomposition of constraints otherwise
3371     */
3372     if (qr_needed) {
3373       /* space to store Q */
3374       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3375       /* first we issue queries for optimal work */
3376       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3377       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3378       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3379       lqr_work = -1;
3380       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3381       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3382       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3383       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3384       lgqr_work = -1;
3385       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3386       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3387       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3388       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3389       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3390       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3391       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3392       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3393       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3394       /* array to store scaling factors for reflectors */
3395       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3396       /* array to store rhs and solution of triangular solver */
3397       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3398       /* allocating workspace for check */
3399       if (pcbddc->dbg_flag) {
3400         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3401       }
3402     }
3403     /* array to store whether a node is primal or not */
3404     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3405     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3406     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3407     if (i != total_primal_vertices) {
3408       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3409     }
3410     for (i=0;i<total_primal_vertices;i++) {
3411       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3412     }
3413     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3414 
3415     /* loop on constraints and see whether or not they need a change of basis and compute it */
3416     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3417       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3418       if (PetscBTLookup(change_basis,total_counts)) {
3419         /* get constraint info */
3420         primal_dofs = constraints_n[total_counts];
3421         dual_dofs = size_of_constraint-primal_dofs;
3422 
3423         if (pcbddc->dbg_flag) {
3424           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3425         }
3426 
3427         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3428 
3429           /* copy quadrature constraints for change of basis check */
3430           if (pcbddc->dbg_flag) {
3431             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3432           }
3433           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3434           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3435 
3436           /* compute QR decomposition of constraints */
3437           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3438           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3439           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3440           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3441           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3442           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3443           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3444 
3445           /* explictly compute R^-T */
3446           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3447           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3448           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3449           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3450           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3451           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3452           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3453           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3454           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3455           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3456 
3457           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3458           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3459           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3460           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3461           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3462           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3463           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3464           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3465           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3466 
3467           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3468              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3469              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3470           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3471           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3472           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3473           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3474           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3475           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3476           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3477           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3478           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3479           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3480 
3481           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3482           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3483           /* insert cols for primal dofs */
3484           for (j=0;j<primal_dofs;j++) {
3485             start_vals = &qr_basis[j*size_of_constraint];
3486             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3487             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3488           }
3489           /* insert cols for dual dofs */
3490           for (j=0,k=0;j<dual_dofs;k++) {
3491             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3492               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3493               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3494               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3495               j++;
3496             }
3497           }
3498 
3499           /* check change of basis */
3500           if (pcbddc->dbg_flag) {
3501             PetscInt   ii,jj;
3502             PetscBool valid_qr=PETSC_TRUE;
3503             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3504             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3505             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3506             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3507             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3508             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3509             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3510             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3511             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3512             for (jj=0;jj<size_of_constraint;jj++) {
3513               for (ii=0;ii<primal_dofs;ii++) {
3514                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3515                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3516               }
3517             }
3518             if (!valid_qr) {
3519               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3520               for (jj=0;jj<size_of_constraint;jj++) {
3521                 for (ii=0;ii<primal_dofs;ii++) {
3522                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3523                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3524                   }
3525                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3526                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3527                   }
3528                 }
3529               }
3530             } else {
3531               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3532             }
3533           }
3534         } else { /* simple transformation block */
3535           PetscInt    row,col;
3536           PetscScalar val,norm;
3537 
3538           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3539           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3540           for (j=0;j<size_of_constraint;j++) {
3541             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3542             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3543             if (!PetscBTLookup(is_primal,row_B)) {
3544               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3545               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3546               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3547             } else {
3548               for (k=0;k<size_of_constraint;k++) {
3549                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3550                 if (row != col) {
3551                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3552                 } else {
3553                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3554                 }
3555                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3556               }
3557             }
3558           }
3559           if (pcbddc->dbg_flag) {
3560             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3561           }
3562         }
3563       } else {
3564         if (pcbddc->dbg_flag) {
3565           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3566         }
3567       }
3568     }
3569 
3570     /* free workspace */
3571     if (qr_needed) {
3572       if (pcbddc->dbg_flag) {
3573         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3574       }
3575       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3576       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3577       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3578       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3579       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3580     }
3581     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3582     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3584 
3585     /* assembling of global change of variable */
3586     {
3587       Mat      tmat;
3588       PetscInt bs;
3589 
3590       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3591       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3592       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3593       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3594       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3595       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3596       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3597       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3598       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3599       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3600       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3601       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3602       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3603       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3604       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3605       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3606       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3607       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3608     }
3609     /* check */
3610     if (pcbddc->dbg_flag) {
3611       PetscReal error;
3612       Vec       x,x_change;
3613 
3614       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3615       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3616       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3617       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3618       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3619       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3620       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3621       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3622       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3623       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3624       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3625       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3626       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3627       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3628       ierr = VecDestroy(&x);CHKERRQ(ierr);
3629       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3630     }
3631 
3632     /* adapt sub_schurs computed (if any) */
3633     if (pcbddc->use_deluxe_scaling) {
3634       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3635       if (sub_schurs->S_Ej_all) {
3636         Mat                    S_new,tmat;
3637         ISLocalToGlobalMapping NtoSall;
3638         IS                     is_all_N,is_V,is_V_Sall;
3639         const PetscScalar      *array;
3640         const PetscInt         *idxs_V,*idxs_all;
3641         PetscInt               i,n_V;
3642 
3643         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3644         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3645         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3646         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3647         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3648         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3649         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3650         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3651         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3652         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3653         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3654         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3655         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3656         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3657         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3658         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3659         for (i=0;i<n_V;i++) {
3660           PetscScalar val;
3661           PetscInt    idx;
3662 
3663           idx = idxs_V[i];
3664           val = array[idxs_all[idxs_V[i]]];
3665           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3666         }
3667         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3668         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3669         sub_schurs->S_Ej_all = S_new;
3670         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3671         if (sub_schurs->sum_S_Ej_all) {
3672           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3673           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3674           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3675           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3676           sub_schurs->sum_S_Ej_all = S_new;
3677           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3678         }
3679         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3680         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3681         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3682         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3683         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3684       }
3685     }
3686     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3687   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3688     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3689     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3690   }
3691 
3692   /* set up change of basis context */
3693   if (pcbddc->ChangeOfBasisMatrix) {
3694     PCBDDCChange_ctx change_ctx;
3695 
3696     if (!pcbddc->new_global_mat) {
3697       PetscInt global_size,local_size;
3698 
3699       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3700       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3701       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3702       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3703       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3704       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3705       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3706       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3707       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3708     } else {
3709       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3710       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3711       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3712     }
3713     if (!pcbddc->user_ChangeOfBasisMatrix) {
3714       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3715       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3716     } else {
3717       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3718       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3719     }
3720     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3721     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3722     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3723     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3724   }
3725 
3726   /* add pressure dofs to set of primal nodes for numbering purposes */
3727   for (i=0;i<pcbddc->benign_n;i++) {
3728     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3729     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3730     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3731     pcbddc->local_primal_size_cc++;
3732     pcbddc->local_primal_size++;
3733   }
3734 
3735   /* check if a new primal space has been introduced (also take into account benign trick) */
3736   pcbddc->new_primal_space_local = PETSC_TRUE;
3737   if (olocal_primal_size == pcbddc->local_primal_size) {
3738     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3739     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3740     if (!pcbddc->new_primal_space_local) {
3741       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3742       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3743     }
3744   }
3745   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3746   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3747   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3748 
3749   /* flush dbg viewer */
3750   if (pcbddc->dbg_flag) {
3751     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3752   }
3753 
3754   /* free workspace */
3755   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3756   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3757   if (!pcbddc->adaptive_selection) {
3758     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3759     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3760   } else {
3761     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3762                       pcbddc->adaptive_constraints_idxs_ptr,
3763                       pcbddc->adaptive_constraints_data_ptr,
3764                       pcbddc->adaptive_constraints_idxs,
3765                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3766     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3767     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3768   }
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3774 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3775 {
3776   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3777   PC_IS       *pcis = (PC_IS*)pc->data;
3778   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3779   PetscInt    ierr,i,vertex_size,N;
3780   PetscViewer viewer=pcbddc->dbg_viewer;
3781 
3782   PetscFunctionBegin;
3783   /* Reset previously computed graph */
3784   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3785   /* Init local Graph struct */
3786   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3787   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3788 
3789   /* Check validity of the csr graph passed in by the user */
3790   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3791     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3792   }
3793 
3794   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3795   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3796     PetscInt  *xadj,*adjncy;
3797     PetscInt  nvtxs;
3798     PetscBool flg_row=PETSC_FALSE;
3799 
3800     if (pcbddc->use_local_adj) {
3801 
3802       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3803       if (flg_row) {
3804         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3805         pcbddc->computed_rowadj = PETSC_TRUE;
3806       }
3807       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3808     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3809       IS                     is_dummy;
3810       ISLocalToGlobalMapping l2gmap_dummy;
3811       PetscInt               j,sum;
3812       PetscInt               *cxadj,*cadjncy;
3813       const PetscInt         *idxs;
3814       PCBDDCGraph            graph;
3815       PetscBT                is_on_boundary;
3816 
3817       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3818       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3819       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3820       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3821       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3822       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3823       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3824       if (flg_row) {
3825         graph->xadj = xadj;
3826         graph->adjncy = adjncy;
3827       }
3828       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3829       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3830       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3831 
3832       if (pcbddc->dbg_flag) {
3833         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3834         for (i=0;i<graph->ncc;i++) {
3835           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3836         }
3837       }
3838 
3839       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3840       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3841       for (i=0;i<pcis->n_B;i++) {
3842         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3843       }
3844       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3845 
3846       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3847       sum = 0;
3848       for (i=0;i<graph->ncc;i++) {
3849         PetscInt sizecc = 0;
3850         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3851           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3852             sizecc++;
3853           }
3854         }
3855         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3856           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3857             cxadj[graph->queue[j]] = sizecc;
3858           }
3859         }
3860         sum += sizecc*sizecc;
3861       }
3862       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3863       sum = 0;
3864       for (i=0;i<pcis->n;i++) {
3865         PetscInt temp = cxadj[i];
3866         cxadj[i] = sum;
3867         sum += temp;
3868       }
3869       cxadj[pcis->n] = sum;
3870       for (i=0;i<graph->ncc;i++) {
3871         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3872           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3873             PetscInt k,sizecc = 0;
3874             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3875               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3876                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3877                 sizecc++;
3878               }
3879             }
3880           }
3881         }
3882       }
3883       if (sum) {
3884         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3885       } else {
3886         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3887         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3888       }
3889       graph->xadj = 0;
3890       graph->adjncy = 0;
3891       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3892       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3893     }
3894   }
3895   if (pcbddc->dbg_flag) {
3896     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3897   }
3898 
3899   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3900   vertex_size = 1;
3901   if (pcbddc->user_provided_isfordofs) {
3902     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3903       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3904       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3905         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3906         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3907       }
3908       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3909       pcbddc->n_ISForDofs = 0;
3910       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3911     }
3912     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3913     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3914   } else {
3915     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3916       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3917       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3918       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3919         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3920       }
3921     }
3922   }
3923 
3924   /* Setup of Graph */
3925   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3926     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3927   }
3928   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3929     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3930   }
3931   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */
3932     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3933   }
3934   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3935 
3936   /* attach info on disconnected subdomains if present */
3937   if (pcbddc->n_local_subs) {
3938     PetscInt *local_subs;
3939 
3940     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3941     for (i=0;i<pcbddc->n_local_subs;i++) {
3942       const PetscInt *idxs;
3943       PetscInt       nl,j;
3944 
3945       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3946       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3947       for (j=0;j<nl;j++) {
3948         local_subs[idxs[j]] = i;
3949       }
3950       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3951     }
3952     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3953     pcbddc->mat_graph->local_subs = local_subs;
3954   }
3955 
3956   /* Graph's connected components analysis */
3957   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3958 
3959   /* print some info to stdout */
3960   if (pcbddc->dbg_flag) {
3961     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3962   }
3963 
3964   /* mark topography has done */
3965   pcbddc->recompute_topography = PETSC_FALSE;
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3970 #undef __FUNCT__
3971 #define __FUNCT__ "PCBDDCSubsetNumbering"
3972 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3973 {
3974   PetscSF        sf;
3975   PetscLayout    map;
3976   const PetscInt *idxs;
3977   PetscInt       *leaf_data,*root_data,*gidxs;
3978   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3979   PetscInt       n_n,nlocals,start,first_index;
3980   PetscMPIInt    commsize;
3981   PetscBool      first_found;
3982   PetscErrorCode ierr;
3983 
3984   PetscFunctionBegin;
3985   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3986   if (subset_mult) {
3987     PetscCheckSameComm(subset,1,subset_mult,2);
3988     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3989     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3990   }
3991   /* create workspace layout for computing global indices of subset */
3992   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3993   lbounds[0] = lbounds[1] = 0;
3994   for (i=0;i<n;i++) {
3995     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3996     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3997   }
3998   lbounds[0] = -lbounds[0];
3999   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4000   gbounds[0] = -gbounds[0];
4001   N = gbounds[1] - gbounds[0] + 1;
4002   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
4003   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
4004   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
4005   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
4006   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
4007 
4008   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
4009   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
4010   if (subset_mult) {
4011     const PetscInt* idxs_mult;
4012 
4013     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4014     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
4015     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4016   } else {
4017     for (i=0;i<n;i++) leaf_data[i] = 1;
4018   }
4019   /* local size of new subset */
4020   n_n = 0;
4021   for (i=0;i<n;i++) n_n += leaf_data[i];
4022 
4023   /* global indexes in layout */
4024   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
4025   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
4026   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
4027   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4028   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4029   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4030 
4031   /* reduce from leaves to roots */
4032   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4033   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4034   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4035 
4036   /* count indexes in local part of layout */
4037   nlocals = 0;
4038   first_index = -1;
4039   first_found = PETSC_FALSE;
4040   for (i=0;i<Nl;i++) {
4041     if (!first_found && root_data[i]) {
4042       first_found = PETSC_TRUE;
4043       first_index = i;
4044     }
4045     nlocals += root_data[i];
4046   }
4047 
4048   /* cumulative of number of indexes and size of subset without holes */
4049 #if defined(PETSC_HAVE_MPI_EXSCAN)
4050   start = 0;
4051   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4052 #else
4053   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4054   start = start-nlocals;
4055 #endif
4056 
4057   if (N_n) { /* compute total size of new subset if requested */
4058     *N_n = start + nlocals;
4059     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4060     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4061   }
4062 
4063   /* adapt root data with cumulative */
4064   if (first_found) {
4065     PetscInt old_index;
4066 
4067     root_data[first_index] += start;
4068     old_index = first_index;
4069     for (i=first_index+1;i<Nl;i++) {
4070       if (root_data[i]) {
4071         root_data[i] += root_data[old_index];
4072         old_index = i;
4073       }
4074     }
4075   }
4076 
4077   /* from roots to leaves */
4078   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4079   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4080   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4081 
4082   /* create new IS with global indexes without holes */
4083   if (subset_mult) {
4084     const PetscInt* idxs_mult;
4085     PetscInt        cum;
4086 
4087     cum = 0;
4088     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4089     for (i=0;i<n;i++) {
4090       PetscInt j;
4091       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4092     }
4093     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4094   } else {
4095     for (i=0;i<n;i++) {
4096       gidxs[i] = leaf_data[i]-1;
4097     }
4098   }
4099   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4100   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4101   PetscFunctionReturn(0);
4102 }
4103 
4104 #undef __FUNCT__
4105 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4106 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4107 {
4108   PetscInt       i,j;
4109   PetscScalar    *alphas;
4110   PetscErrorCode ierr;
4111 
4112   PetscFunctionBegin;
4113   /* this implements stabilized Gram-Schmidt */
4114   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4115   for (i=0;i<n;i++) {
4116     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4117     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4118     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4119   }
4120   ierr = PetscFree(alphas);CHKERRQ(ierr);
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatISGetSubassemblingPattern"
4126 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4127 {
4128   IS             ranks_send_to;
4129   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4130   PetscMPIInt    size,rank,color;
4131   PetscInt       *xadj,*adjncy;
4132   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4133   PetscInt       i,local_size,threshold=0;
4134   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4135   PetscSubcomm   subcomm;
4136   PetscErrorCode ierr;
4137 
4138   PetscFunctionBegin;
4139   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4140   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4141   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4142 
4143   /* Get info on mapping */
4144   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4145   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4146 
4147   /* build local CSR graph of subdomains' connectivity */
4148   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4149   xadj[0] = 0;
4150   xadj[1] = PetscMax(n_neighs-1,0);
4151   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4152   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4153 
4154   if (threshold) {
4155     PetscInt xadj_count = 0;
4156     for (i=1;i<n_neighs;i++) {
4157       if (n_shared[i] > threshold) {
4158         adjncy[xadj_count] = neighs[i];
4159         adjncy_wgt[xadj_count] = n_shared[i];
4160         xadj_count++;
4161       }
4162     }
4163     xadj[1] = xadj_count;
4164   } else {
4165     if (xadj[1]) {
4166       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4167       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4168     }
4169   }
4170   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4171   if (use_square) {
4172     for (i=0;i<xadj[1];i++) {
4173       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4174     }
4175   }
4176   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4177 
4178   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4179 
4180   /*
4181     Restrict work on active processes only.
4182   */
4183   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4184   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4185   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4186   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4187   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4188   if (color) {
4189     ierr = PetscFree(xadj);CHKERRQ(ierr);
4190     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4191     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4192   } else {
4193     Mat             subdomain_adj;
4194     IS              new_ranks,new_ranks_contig;
4195     MatPartitioning partitioner;
4196     PetscInt        prank,rstart=0,rend=0;
4197     PetscInt        *is_indices,*oldranks;
4198     PetscBool       aggregate;
4199 
4200     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4201     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4202     prank = rank;
4203     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4204     /*
4205     for (i=0;i<size;i++) {
4206       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4207     }
4208     */
4209     for (i=0;i<xadj[1];i++) {
4210       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4211     }
4212     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4213     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4214     if (aggregate) {
4215       PetscInt    lrows,row,ncols,*cols;
4216       PetscMPIInt nrank;
4217       PetscScalar *vals;
4218 
4219       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4220       lrows = 0;
4221       if (nrank<redprocs) {
4222         lrows = size/redprocs;
4223         if (nrank<size%redprocs) lrows++;
4224       }
4225       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4226       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4227       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4228       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4229       row = nrank;
4230       ncols = xadj[1]-xadj[0];
4231       cols = adjncy;
4232       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4233       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4234       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4235       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4236       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4237       ierr = PetscFree(xadj);CHKERRQ(ierr);
4238       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4239       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4240       ierr = PetscFree(vals);CHKERRQ(ierr);
4241     } else {
4242       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4243     }
4244     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4245 
4246     /* Partition */
4247     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4248     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4249     if (use_vwgt) {
4250       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4251       v_wgt[0] = local_size;
4252       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4253     }
4254     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4255     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4256     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4257     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4258     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4259 
4260     /* renumber new_ranks to avoid "holes" in new set of processors */
4261     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4262     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4263     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4264     if (!redprocs) {
4265       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4266     } else {
4267       PetscInt    idxs[1];
4268       PetscMPIInt tag;
4269       MPI_Request *reqs;
4270 
4271       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4272       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4273       for (i=rstart;i<rend;i++) {
4274         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4275       }
4276       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4277       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4278       ierr = PetscFree(reqs);CHKERRQ(ierr);
4279       ranks_send_to_idx[0] = oldranks[idxs[0]];
4280     }
4281     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4282     /* clean up */
4283     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4284     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4285     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4286     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4287   }
4288   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4289 
4290   /* assemble parallel IS for sends */
4291   i = 1;
4292   if (color) i=0;
4293   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4294   /* get back IS */
4295   *is_sends = ranks_send_to;
4296   PetscFunctionReturn(0);
4297 }
4298 
4299 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4300 
4301 #undef __FUNCT__
4302 #define __FUNCT__ "MatISSubassemble"
4303 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4304 {
4305   Mat                    local_mat;
4306   IS                     is_sends_internal;
4307   PetscInt               rows,cols,new_local_rows;
4308   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4309   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4310   ISLocalToGlobalMapping l2gmap;
4311   PetscInt*              l2gmap_indices;
4312   const PetscInt*        is_indices;
4313   MatType                new_local_type;
4314   /* buffers */
4315   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4316   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4317   PetscInt               *recv_buffer_idxs_local;
4318   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4319   /* MPI */
4320   MPI_Comm               comm,comm_n;
4321   PetscSubcomm           subcomm;
4322   PetscMPIInt            n_sends,n_recvs,commsize;
4323   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4324   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4325   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4326   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4327   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4328   PetscErrorCode         ierr;
4329 
4330   PetscFunctionBegin;
4331   /* TODO: add missing checks */
4332   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4333   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4334   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4335   PetscValidLogicalCollectiveInt(mat,nis,7);
4336   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4337   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4338   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4339   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4340   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4341   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4342   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4343   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4344     PetscInt mrows,mcols,mnrows,mncols;
4345     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4346     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4347     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4348     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4349     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4350     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4351   }
4352   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4353   PetscValidLogicalCollectiveInt(mat,bs,0);
4354   /* prepare IS for sending if not provided */
4355   if (!is_sends) {
4356     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4357     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4358   } else {
4359     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4360     is_sends_internal = is_sends;
4361   }
4362 
4363   /* get comm */
4364   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4365 
4366   /* compute number of sends */
4367   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4368   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4369 
4370   /* compute number of receives */
4371   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4372   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4373   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4374   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4375   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4376   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4377   ierr = PetscFree(iflags);CHKERRQ(ierr);
4378 
4379   /* restrict comm if requested */
4380   subcomm = 0;
4381   destroy_mat = PETSC_FALSE;
4382   if (restrict_comm) {
4383     PetscMPIInt color,subcommsize;
4384 
4385     color = 0;
4386     if (restrict_full) {
4387       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4388     } else {
4389       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4390     }
4391     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4392     subcommsize = commsize - subcommsize;
4393     /* check if reuse has been requested */
4394     if (reuse == MAT_REUSE_MATRIX) {
4395       if (*mat_n) {
4396         PetscMPIInt subcommsize2;
4397         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4398         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4399         comm_n = PetscObjectComm((PetscObject)*mat_n);
4400       } else {
4401         comm_n = PETSC_COMM_SELF;
4402       }
4403     } else { /* MAT_INITIAL_MATRIX */
4404       PetscMPIInt rank;
4405 
4406       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4407       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4408       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4409       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4410       comm_n = PetscSubcommChild(subcomm);
4411     }
4412     /* flag to destroy *mat_n if not significative */
4413     if (color) destroy_mat = PETSC_TRUE;
4414   } else {
4415     comm_n = comm;
4416   }
4417 
4418   /* prepare send/receive buffers */
4419   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4420   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4421   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4422   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4423   if (nis) {
4424     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4425   }
4426 
4427   /* Get data from local matrices */
4428   if (!isdense) {
4429     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4430     /* TODO: See below some guidelines on how to prepare the local buffers */
4431     /*
4432        send_buffer_vals should contain the raw values of the local matrix
4433        send_buffer_idxs should contain:
4434        - MatType_PRIVATE type
4435        - PetscInt        size_of_l2gmap
4436        - PetscInt        global_row_indices[size_of_l2gmap]
4437        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4438     */
4439   } else {
4440     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4441     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4442     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4443     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4444     send_buffer_idxs[1] = i;
4445     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4446     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4447     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4448     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4449     for (i=0;i<n_sends;i++) {
4450       ilengths_vals[is_indices[i]] = len*len;
4451       ilengths_idxs[is_indices[i]] = len+2;
4452     }
4453   }
4454   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4455   /* additional is (if any) */
4456   if (nis) {
4457     PetscMPIInt psum;
4458     PetscInt j;
4459     for (j=0,psum=0;j<nis;j++) {
4460       PetscInt plen;
4461       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4462       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4463       psum += len+1; /* indices + lenght */
4464     }
4465     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4466     for (j=0,psum=0;j<nis;j++) {
4467       PetscInt plen;
4468       const PetscInt *is_array_idxs;
4469       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4470       send_buffer_idxs_is[psum] = plen;
4471       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4472       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4473       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4474       psum += plen+1; /* indices + lenght */
4475     }
4476     for (i=0;i<n_sends;i++) {
4477       ilengths_idxs_is[is_indices[i]] = psum;
4478     }
4479     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4480   }
4481 
4482   buf_size_idxs = 0;
4483   buf_size_vals = 0;
4484   buf_size_idxs_is = 0;
4485   for (i=0;i<n_recvs;i++) {
4486     buf_size_idxs += (PetscInt)olengths_idxs[i];
4487     buf_size_vals += (PetscInt)olengths_vals[i];
4488     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4489   }
4490   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4491   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4492   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4493 
4494   /* get new tags for clean communications */
4495   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4496   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4497   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4498 
4499   /* allocate for requests */
4500   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4501   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4502   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4503   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4504   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4505   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4506 
4507   /* communications */
4508   ptr_idxs = recv_buffer_idxs;
4509   ptr_vals = recv_buffer_vals;
4510   ptr_idxs_is = recv_buffer_idxs_is;
4511   for (i=0;i<n_recvs;i++) {
4512     source_dest = onodes[i];
4513     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4514     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4515     ptr_idxs += olengths_idxs[i];
4516     ptr_vals += olengths_vals[i];
4517     if (nis) {
4518       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4519       ptr_idxs_is += olengths_idxs_is[i];
4520     }
4521   }
4522   for (i=0;i<n_sends;i++) {
4523     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4524     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4525     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4526     if (nis) {
4527       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4528     }
4529   }
4530   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4531   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4532 
4533   /* assemble new l2g map */
4534   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4535   ptr_idxs = recv_buffer_idxs;
4536   new_local_rows = 0;
4537   for (i=0;i<n_recvs;i++) {
4538     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4539     ptr_idxs += olengths_idxs[i];
4540   }
4541   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4542   ptr_idxs = recv_buffer_idxs;
4543   new_local_rows = 0;
4544   for (i=0;i<n_recvs;i++) {
4545     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4546     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4547     ptr_idxs += olengths_idxs[i];
4548   }
4549   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4550   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4551   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4552 
4553   /* infer new local matrix type from received local matrices type */
4554   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4555   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4556   if (n_recvs) {
4557     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4558     ptr_idxs = recv_buffer_idxs;
4559     for (i=0;i<n_recvs;i++) {
4560       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4561         new_local_type_private = MATAIJ_PRIVATE;
4562         break;
4563       }
4564       ptr_idxs += olengths_idxs[i];
4565     }
4566     switch (new_local_type_private) {
4567       case MATDENSE_PRIVATE:
4568         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4569           new_local_type = MATSEQAIJ;
4570           bs = 1;
4571         } else { /* if I receive only 1 dense matrix */
4572           new_local_type = MATSEQDENSE;
4573           bs = 1;
4574         }
4575         break;
4576       case MATAIJ_PRIVATE:
4577         new_local_type = MATSEQAIJ;
4578         bs = 1;
4579         break;
4580       case MATBAIJ_PRIVATE:
4581         new_local_type = MATSEQBAIJ;
4582         break;
4583       case MATSBAIJ_PRIVATE:
4584         new_local_type = MATSEQSBAIJ;
4585         break;
4586       default:
4587         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4588         break;
4589     }
4590   } else { /* by default, new_local_type is seqdense */
4591     new_local_type = MATSEQDENSE;
4592     bs = 1;
4593   }
4594 
4595   /* create MATIS object if needed */
4596   if (reuse == MAT_INITIAL_MATRIX) {
4597     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4598     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4599   } else {
4600     /* it also destroys the local matrices */
4601     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4602   }
4603   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4604   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4605 
4606   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4607 
4608   /* Global to local map of received indices */
4609   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4610   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4611   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4612 
4613   /* restore attributes -> type of incoming data and its size */
4614   buf_size_idxs = 0;
4615   for (i=0;i<n_recvs;i++) {
4616     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4617     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4618     buf_size_idxs += (PetscInt)olengths_idxs[i];
4619   }
4620   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4621 
4622   /* set preallocation */
4623   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4624   if (!newisdense) {
4625     PetscInt *new_local_nnz=0;
4626 
4627     ptr_vals = recv_buffer_vals;
4628     ptr_idxs = recv_buffer_idxs_local;
4629     if (n_recvs) {
4630       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4631     }
4632     for (i=0;i<n_recvs;i++) {
4633       PetscInt j;
4634       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4635         for (j=0;j<*(ptr_idxs+1);j++) {
4636           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4637         }
4638       } else {
4639         /* TODO */
4640       }
4641       ptr_idxs += olengths_idxs[i];
4642     }
4643     if (new_local_nnz) {
4644       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4645       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4646       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4647       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4648       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4649       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4650     } else {
4651       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4652     }
4653     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4654   } else {
4655     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4656   }
4657 
4658   /* set values */
4659   ptr_vals = recv_buffer_vals;
4660   ptr_idxs = recv_buffer_idxs_local;
4661   for (i=0;i<n_recvs;i++) {
4662     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4663       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4664       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4665       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4666       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4667       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4668     } else {
4669       /* TODO */
4670     }
4671     ptr_idxs += olengths_idxs[i];
4672     ptr_vals += olengths_vals[i];
4673   }
4674   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4676   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4677   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4678   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4679   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4680 
4681 #if 0
4682   if (!restrict_comm) { /* check */
4683     Vec       lvec,rvec;
4684     PetscReal infty_error;
4685 
4686     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4687     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4688     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4689     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4690     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4691     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4692     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4693     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4694     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4695   }
4696 #endif
4697 
4698   /* assemble new additional is (if any) */
4699   if (nis) {
4700     PetscInt **temp_idxs,*count_is,j,psum;
4701 
4702     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4703     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4704     ptr_idxs = recv_buffer_idxs_is;
4705     psum = 0;
4706     for (i=0;i<n_recvs;i++) {
4707       for (j=0;j<nis;j++) {
4708         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4709         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4710         psum += plen;
4711         ptr_idxs += plen+1; /* shift pointer to received data */
4712       }
4713     }
4714     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4715     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4716     for (i=1;i<nis;i++) {
4717       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4718     }
4719     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4720     ptr_idxs = recv_buffer_idxs_is;
4721     for (i=0;i<n_recvs;i++) {
4722       for (j=0;j<nis;j++) {
4723         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4724         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4725         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4726         ptr_idxs += plen+1; /* shift pointer to received data */
4727       }
4728     }
4729     for (i=0;i<nis;i++) {
4730       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4731       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4732       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4733     }
4734     ierr = PetscFree(count_is);CHKERRQ(ierr);
4735     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4736     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4737   }
4738   /* free workspace */
4739   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4740   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4741   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4742   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4743   if (isdense) {
4744     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4745     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4746   } else {
4747     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4748   }
4749   if (nis) {
4750     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4751     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4752   }
4753   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4754   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4755   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4756   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4757   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4758   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4759   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4760   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4761   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4762   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4763   ierr = PetscFree(onodes);CHKERRQ(ierr);
4764   if (nis) {
4765     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4766     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4767     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4768   }
4769   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4770   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4771     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4772     for (i=0;i<nis;i++) {
4773       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4774     }
4775     *mat_n = NULL;
4776   }
4777   PetscFunctionReturn(0);
4778 }
4779 
4780 /* temporary hack into ksp private data structure */
4781 #include <petsc/private/kspimpl.h>
4782 
4783 #undef __FUNCT__
4784 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4785 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4786 {
4787   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4788   PC_IS                  *pcis = (PC_IS*)pc->data;
4789   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4790   MatNullSpace           CoarseNullSpace=NULL;
4791   ISLocalToGlobalMapping coarse_islg;
4792   IS                     coarse_is,*isarray;
4793   PetscInt               i,im_active=-1,active_procs=-1;
4794   PetscInt               nis,nisdofs,nisneu,nisvert;
4795   PC                     pc_temp;
4796   PCType                 coarse_pc_type;
4797   KSPType                coarse_ksp_type;
4798   PetscBool              multilevel_requested,multilevel_allowed;
4799   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4800   Mat                    t_coarse_mat_is;
4801   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4802   PetscMPIInt            all_procs;
4803   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4804   PetscBool              compute_vecs = PETSC_FALSE;
4805   PetscScalar            *array;
4806   PetscErrorCode         ierr;
4807 
4808   PetscFunctionBegin;
4809   /* Assign global numbering to coarse dofs */
4810   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4811     PetscInt ocoarse_size;
4812     compute_vecs = PETSC_TRUE;
4813     ocoarse_size = pcbddc->coarse_size;
4814     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4815     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4816     /* see if we can avoid some work */
4817     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4818       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4819       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4820         PC        pc;
4821         PetscBool isbddc;
4822 
4823         /* temporary workaround since PCBDDC does not have a reset method so far */
4824         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4825         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4826         if (isbddc) {
4827           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4828         }
4829         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4830         coarse_reuse = PETSC_FALSE;
4831       } else { /* we can safely reuse already computed coarse matrix */
4832         coarse_reuse = PETSC_TRUE;
4833       }
4834     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4835       coarse_reuse = PETSC_FALSE;
4836     }
4837     /* reset any subassembling information */
4838     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4839     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4840   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4841     coarse_reuse = PETSC_TRUE;
4842   }
4843 
4844   /* count "active" (i.e. with positive local size) and "void" processes */
4845   im_active = !!(pcis->n);
4846   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4847   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4848   void_procs = all_procs-active_procs;
4849   csin_type_simple = PETSC_TRUE;
4850   redist = PETSC_FALSE;
4851   if (pcbddc->current_level && void_procs) {
4852     csin_ml = PETSC_TRUE;
4853     ncoarse_ml = void_procs;
4854     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4855     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4856       csin_ds = PETSC_TRUE;
4857       ncoarse_ds = pcbddc->redistribute_coarse;
4858       redist = PETSC_TRUE;
4859     } else {
4860       csin_ds = PETSC_TRUE;
4861       ncoarse_ds = active_procs;
4862       redist = PETSC_TRUE;
4863     }
4864   } else {
4865     csin_ml = PETSC_FALSE;
4866     ncoarse_ml = all_procs;
4867     if (void_procs) {
4868       csin_ds = PETSC_TRUE;
4869       ncoarse_ds = void_procs;
4870       csin_type_simple = PETSC_FALSE;
4871     } else {
4872       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4873         csin_ds = PETSC_TRUE;
4874         ncoarse_ds = pcbddc->redistribute_coarse;
4875         redist = PETSC_TRUE;
4876       } else {
4877         csin_ds = PETSC_FALSE;
4878         ncoarse_ds = all_procs;
4879       }
4880     }
4881   }
4882 
4883   /*
4884     test if we can go multilevel: three conditions must be satisfied:
4885     - we have not exceeded the number of levels requested
4886     - we can actually subassemble the active processes
4887     - we can find a suitable number of MPI processes where we can place the subassembled problem
4888   */
4889   multilevel_allowed = PETSC_FALSE;
4890   multilevel_requested = PETSC_FALSE;
4891   if (pcbddc->current_level < pcbddc->max_levels) {
4892     multilevel_requested = PETSC_TRUE;
4893     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4894       multilevel_allowed = PETSC_FALSE;
4895     } else {
4896       multilevel_allowed = PETSC_TRUE;
4897     }
4898   }
4899   /* determine number of process partecipating to coarse solver */
4900   if (multilevel_allowed) {
4901     ncoarse = ncoarse_ml;
4902     csin = csin_ml;
4903     redist = PETSC_FALSE;
4904   } else {
4905     ncoarse = ncoarse_ds;
4906     csin = csin_ds;
4907   }
4908 
4909   /* creates temporary l2gmap and IS for coarse indexes */
4910   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4911   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4912 
4913   /* creates temporary MATIS object for coarse matrix */
4914   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4915   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4916   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4917   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4918   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4919   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4920   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4921   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4922   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4923 
4924   /* compute dofs splitting and neumann boundaries for coarse dofs */
4925   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */
4926     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4927     const PetscInt         *idxs;
4928     ISLocalToGlobalMapping tmap;
4929 
4930     /* create map between primal indices (in local representative ordering) and local primal numbering */
4931     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4932     /* allocate space for temporary storage */
4933     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4934     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4935     /* allocate for IS array */
4936     nisdofs = pcbddc->n_ISForDofsLocal;
4937     nisneu = !!pcbddc->NeumannBoundariesLocal;
4938     nisvert = 0;
4939     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4940       nisvert = 1;
4941     }
4942     nis = nisdofs + nisneu + nisvert;
4943     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4944     /* dofs splitting */
4945     for (i=0;i<nisdofs;i++) {
4946       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4947       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4948       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4949       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4950       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4951       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4952       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4953       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4954     }
4955     /* neumann boundaries */
4956     if (pcbddc->NeumannBoundariesLocal) {
4957       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4958       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4959       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4960       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4961       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4962       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4963       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4964       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4965     }
4966     /* primal vertices (benign) */
4967     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4968       ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr);
4969       ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4970       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4971       ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4972       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4973       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr);
4974     }
4975     /* free memory */
4976     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4977     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4978     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4979   } else {
4980     nis = 0;
4981     nisdofs = 0;
4982     nisneu = 0;
4983     nisvert = 0;
4984     isarray = NULL;
4985   }
4986   /* destroy no longer needed map */
4987   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4988 
4989   /* restrict on coarse candidates (if needed) */
4990   coarse_mat_is = NULL;
4991   if (csin) {
4992     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4993       if (redist) {
4994         PetscMPIInt rank;
4995         PetscInt    spc,n_spc_p1,dest[1],destsize;
4996 
4997         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4998         spc = active_procs/ncoarse;
4999         n_spc_p1 = active_procs%ncoarse;
5000         if (im_active) {
5001           destsize = 1;
5002           if (rank > n_spc_p1*(spc+1)-1) {
5003             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
5004           } else {
5005             dest[0] = rank/(spc+1);
5006           }
5007         } else {
5008           destsize = 0;
5009         }
5010         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5011       } else if (csin_type_simple) {
5012         PetscMPIInt rank;
5013         PetscInt    issize,isidx;
5014 
5015         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5016         if (im_active) {
5017           issize = 1;
5018           isidx = (PetscInt)rank;
5019         } else {
5020           issize = 0;
5021           isidx = -1;
5022         }
5023         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5024       } else { /* get a suitable subassembling pattern from MATIS code */
5025         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5026       }
5027 
5028       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
5029       if (!redist || ncoarse <= void_procs) {
5030         PetscInt ncoarse_cand,tissize,*nisindices;
5031         PetscInt *coarse_candidates;
5032         const PetscInt* tisindices;
5033 
5034         /* get coarse candidates' ranks in pc communicator */
5035         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5036         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5037         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5038           if (!coarse_candidates[i]) {
5039             coarse_candidates[ncoarse_cand++]=i;
5040           }
5041         }
5042         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5043 
5044 
5045         if (pcbddc->dbg_flag) {
5046           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5047           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5048           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5049           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5050           for (i=0;i<ncoarse_cand;i++) {
5051             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5052           }
5053           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5054           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5055         }
5056         /* shift the pattern on coarse candidates */
5057         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5058         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5059         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5060         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5061         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5062         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5063         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5064       }
5065       if (pcbddc->dbg_flag) {
5066         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5067         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5068         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5069         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5070       }
5071     }
5072     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5073     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5074       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5075     } else { /* this is the last level, so use just receiving processes in subcomm */
5076       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5077     }
5078   } else {
5079     if (pcbddc->dbg_flag) {
5080       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5081       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5082       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5083     }
5084     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5085     coarse_mat_is = t_coarse_mat_is;
5086   }
5087 
5088   /* create local to global scatters for coarse problem */
5089   if (compute_vecs) {
5090     PetscInt lrows;
5091     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5092     if (coarse_mat_is) {
5093       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5094     } else {
5095       lrows = 0;
5096     }
5097     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5098     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5099     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5100     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5101     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5102   }
5103   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5104   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5105 
5106   /* set defaults for coarse KSP and PC */
5107   if (multilevel_allowed) {
5108     coarse_ksp_type = KSPRICHARDSON;
5109     coarse_pc_type = PCBDDC;
5110   } else {
5111     coarse_ksp_type = KSPPREONLY;
5112     coarse_pc_type = PCREDUNDANT;
5113   }
5114 
5115   /* print some info if requested */
5116   if (pcbddc->dbg_flag) {
5117     if (!multilevel_allowed) {
5118       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5119       if (multilevel_requested) {
5120         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5121       } else if (pcbddc->max_levels) {
5122         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5123       }
5124       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5125     }
5126   }
5127 
5128   /* create the coarse KSP object only once with defaults */
5129   if (coarse_mat_is) {
5130     MatReuse coarse_mat_reuse;
5131     PetscViewer dbg_viewer = NULL;
5132     if (pcbddc->dbg_flag) {
5133       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5134       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5135     }
5136     if (!pcbddc->coarse_ksp) {
5137       char prefix[256],str_level[16];
5138       size_t len;
5139       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5140       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5141       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5142       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5143       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5144       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5145       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5146       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5147       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5148       /* prefix */
5149       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5150       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5151       if (!pcbddc->current_level) {
5152         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5153         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5154       } else {
5155         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5156         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5157         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5158         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5159         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5160         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5161       }
5162       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5163       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5164       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5165       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5166       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5167       /* allow user customization */
5168       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5169     }
5170     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5171     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5172     if (nisdofs) {
5173       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5174       for (i=0;i<nisdofs;i++) {
5175         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5176       }
5177     }
5178     if (nisneu) {
5179       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5180       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5181     }
5182     if (nisvert) {
5183       ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr);
5184       ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr);
5185     }
5186 
5187     /* get some info after set from options */
5188     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5189     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5190     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5191     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5192       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5193       isbddc = PETSC_FALSE;
5194     }
5195     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5196     if (isredundant) {
5197       KSP inner_ksp;
5198       PC  inner_pc;
5199       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5200       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5201       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5202     }
5203 
5204     /* assemble coarse matrix */
5205     if (coarse_reuse) {
5206       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5207       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5208       coarse_mat_reuse = MAT_REUSE_MATRIX;
5209     } else {
5210       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5211     }
5212     if (isbddc || isnn) {
5213       if (pcbddc->coarsening_ratio > 1) {
5214         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5215           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5216           if (pcbddc->dbg_flag) {
5217             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5218             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5219             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5220             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5221           }
5222         }
5223         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5224       } else {
5225         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5226         coarse_mat = coarse_mat_is;
5227       }
5228     } else {
5229       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5230     }
5231     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5232 
5233     /* propagate symmetry info of coarse matrix */
5234     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5235     if (pc->pmat->symmetric_set) {
5236       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5237     }
5238     if (pc->pmat->hermitian_set) {
5239       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5240     }
5241     if (pc->pmat->spd_set) {
5242       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5243     }
5244     /* set operators */
5245     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5246     if (pcbddc->dbg_flag) {
5247       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5248     }
5249   } else { /* processes non partecipating to coarse solver (if any) */
5250     coarse_mat = 0;
5251   }
5252   ierr = PetscFree(isarray);CHKERRQ(ierr);
5253 #if 0
5254   {
5255     PetscViewer viewer;
5256     char filename[256];
5257     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5258     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5259     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5260     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5261     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5262   }
5263 #endif
5264 
5265   /* Compute coarse null space (special handling by BDDC only) */
5266 #if 0
5267   if (pcbddc->NullSpace) {
5268     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5269   }
5270 #endif
5271   /* hack */
5272   if (pcbddc->coarse_ksp) {
5273     Vec crhs,csol;
5274 
5275     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5276     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5277     if (!csol) {
5278       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5279     }
5280     if (!crhs) {
5281       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5282     }
5283   }
5284 
5285   /* compute null space for coarse solver if the benign trick has been requested */
5286   if (pcbddc->benign_null) {
5287 
5288     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5289     for (i=0;i<pcbddc->benign_n;i++) {
5290       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5291     }
5292     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5293     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5294     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5295     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5296     if (coarse_mat) {
5297       Vec         nullv;
5298       PetscScalar *array,*array2;
5299       PetscInt    nl;
5300 
5301       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5302       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5303       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5304       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5305       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5306       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5307       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5308       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5309       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5310       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5311     }
5312   }
5313 
5314   if (pcbddc->coarse_ksp) {
5315     PetscBool ispreonly;
5316 
5317     if (CoarseNullSpace) {
5318       PetscBool isnull;
5319       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5320       if (isnull) {
5321         if (isbddc && !pcbddc->benign_saddle_point) {
5322           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5323         } else {
5324           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5325         }
5326       } else {
5327         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5328       }
5329     }
5330     /* setup coarse ksp */
5331     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5332     /* Check coarse problem if in debug mode or if solving with an iterative method */
5333     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5334     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5335       KSP       check_ksp;
5336       KSPType   check_ksp_type;
5337       PC        check_pc;
5338       Vec       check_vec,coarse_vec;
5339       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5340       PetscInt  its;
5341       PetscBool compute_eigs;
5342       PetscReal *eigs_r,*eigs_c;
5343       PetscInt  neigs;
5344       const char *prefix;
5345 
5346       /* Create ksp object suitable for estimation of extreme eigenvalues */
5347       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5348       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5349       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5350       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5351       if (ispreonly) {
5352         check_ksp_type = KSPPREONLY;
5353         compute_eigs = PETSC_FALSE;
5354       } else {
5355         check_ksp_type = KSPGMRES;
5356         compute_eigs = PETSC_TRUE;
5357       }
5358       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5359       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5360       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5361       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5362       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5363       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5364       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5365       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5366       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5367       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5368       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5369       /* create random vec */
5370       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5371       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5372       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5373       if (CoarseNullSpace) {
5374         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5375       }
5376       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5377       /* solve coarse problem */
5378       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5379       if (CoarseNullSpace) {
5380         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5381       }
5382       /* set eigenvalue estimation if preonly has not been requested */
5383       if (compute_eigs) {
5384         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5385         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5386         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5387         lambda_max = eigs_r[neigs-1];
5388         lambda_min = eigs_r[0];
5389         if (pcbddc->use_coarse_estimates) {
5390           if (lambda_max>lambda_min) {
5391             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5392             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5393           }
5394         }
5395       }
5396 
5397       /* check coarse problem residual error */
5398       if (pcbddc->dbg_flag) {
5399         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5400         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5401         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5402         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5403         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5404         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5405         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5406         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5407         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5408         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5409         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5410         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5411         if (CoarseNullSpace) {
5412           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5413         }
5414         if (compute_eigs) {
5415           PetscReal lambda_max_s,lambda_min_s;
5416           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5417           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5418           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5419           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5420           for (i=0;i<neigs;i++) {
5421             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5422           }
5423         }
5424         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5425         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5426       }
5427       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5428       if (compute_eigs) {
5429         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5430         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5431       }
5432     }
5433   }
5434   /* print additional info */
5435   if (pcbddc->dbg_flag) {
5436     /* waits until all processes reaches this point */
5437     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5438     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5439     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5440   }
5441 
5442   /* free memory */
5443   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5444   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5445   PetscFunctionReturn(0);
5446 }
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5450 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5451 {
5452   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5453   PC_IS*         pcis = (PC_IS*)pc->data;
5454   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5455   IS             subset,subset_mult,subset_n;
5456   PetscInt       local_size,coarse_size=0;
5457   PetscInt       *local_primal_indices=NULL;
5458   const PetscInt *t_local_primal_indices;
5459   PetscErrorCode ierr;
5460 
5461   PetscFunctionBegin;
5462   /* Compute global number of coarse dofs */
5463   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5464     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5465   }
5466   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5467   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5468   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5469   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5470   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5471   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5472   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5473   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5474   if (local_size != pcbddc->local_primal_size) {
5475     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5476   }
5477   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5478   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5479   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5480   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5481   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5482 
5483   /* check numbering */
5484   if (pcbddc->dbg_flag) {
5485     PetscScalar coarsesum,*array,*array2;
5486     PetscInt    i;
5487     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5488 
5489     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5490     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5491     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5492     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5493     /* counter */
5494     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5495     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5496     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5497     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5498     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5499     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5500     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5501     for (i=0;i<pcbddc->local_primal_size;i++) {
5502       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5503     }
5504     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5505     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5506     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5507     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5508     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5509     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5510     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5511     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5512     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5513     for (i=0;i<pcis->n;i++) {
5514       if (array[i] != 0.0 && array[i] != array2[i]) {
5515         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5516         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5517         set_error = PETSC_TRUE;
5518         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5519       }
5520     }
5521     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5522     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5523     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5524     for (i=0;i<pcis->n;i++) {
5525       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5526     }
5527     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5528     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5529     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5530     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5531     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5532     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5533     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5534       PetscInt *gidxs;
5535 
5536       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5537       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5538       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5540       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5541       for (i=0;i<pcbddc->local_primal_size;i++) {
5542         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5543       }
5544       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5545       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5546     }
5547     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5548     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5549     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5550   }
5551   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5552   /* get back data */
5553   *coarse_size_n = coarse_size;
5554   *local_primal_indices_n = local_primal_indices;
5555   PetscFunctionReturn(0);
5556 }
5557 
5558 #undef __FUNCT__
5559 #define __FUNCT__ "PCBDDCGlobalToLocal"
5560 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5561 {
5562   IS             localis_t;
5563   PetscInt       i,lsize,*idxs,n;
5564   PetscScalar    *vals;
5565   PetscErrorCode ierr;
5566 
5567   PetscFunctionBegin;
5568   /* get indices in local ordering exploiting local to global map */
5569   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5570   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5571   for (i=0;i<lsize;i++) vals[i] = 1.0;
5572   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5573   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5574   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5575   if (idxs) { /* multilevel guard */
5576     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5577   }
5578   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5579   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5580   ierr = PetscFree(vals);CHKERRQ(ierr);
5581   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5582   /* now compute set in local ordering */
5583   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5584   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5585   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5586   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5587   for (i=0,lsize=0;i<n;i++) {
5588     if (PetscRealPart(vals[i]) > 0.5) {
5589       lsize++;
5590     }
5591   }
5592   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5593   for (i=0,lsize=0;i<n;i++) {
5594     if (PetscRealPart(vals[i]) > 0.5) {
5595       idxs[lsize++] = i;
5596     }
5597   }
5598   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5599   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5600   *localis = localis_t;
5601   PetscFunctionReturn(0);
5602 }
5603 
5604 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5605 #undef __FUNCT__
5606 #define __FUNCT__ "PCBDDCMatMult_Private"
5607 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5608 {
5609   PCBDDCChange_ctx change_ctx;
5610   PetscErrorCode   ierr;
5611 
5612   PetscFunctionBegin;
5613   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5614   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5615   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5616   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5617   PetscFunctionReturn(0);
5618 }
5619 
5620 #undef __FUNCT__
5621 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5622 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5623 {
5624   PCBDDCChange_ctx change_ctx;
5625   PetscErrorCode   ierr;
5626 
5627   PetscFunctionBegin;
5628   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5629   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5630   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5631   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5632   PetscFunctionReturn(0);
5633 }
5634 
5635 #undef __FUNCT__
5636 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5637 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5638 {
5639   PC_IS               *pcis=(PC_IS*)pc->data;
5640   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5641   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5642   Mat                 S_j;
5643   PetscInt            *used_xadj,*used_adjncy;
5644   PetscBool           free_used_adj;
5645   PetscErrorCode      ierr;
5646 
5647   PetscFunctionBegin;
5648   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5649   free_used_adj = PETSC_FALSE;
5650   if (pcbddc->sub_schurs_layers == -1) {
5651     used_xadj = NULL;
5652     used_adjncy = NULL;
5653   } else {
5654     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5655       used_xadj = pcbddc->mat_graph->xadj;
5656       used_adjncy = pcbddc->mat_graph->adjncy;
5657     } else if (pcbddc->computed_rowadj) {
5658       used_xadj = pcbddc->mat_graph->xadj;
5659       used_adjncy = pcbddc->mat_graph->adjncy;
5660     } else {
5661       PetscBool      flg_row=PETSC_FALSE;
5662       const PetscInt *xadj,*adjncy;
5663       PetscInt       nvtxs;
5664 
5665       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5666       if (flg_row) {
5667         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5668         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5669         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5670         free_used_adj = PETSC_TRUE;
5671       } else {
5672         pcbddc->sub_schurs_layers = -1;
5673         used_xadj = NULL;
5674         used_adjncy = NULL;
5675       }
5676       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5677     }
5678   }
5679 
5680   /* setup sub_schurs data */
5681   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5682   if (!sub_schurs->use_mumps) {
5683     /* pcbddc->ksp_D up to date only if not using MUMPS */
5684     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5685     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5686   } else {
5687     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5688     PetscBool isseqaij;
5689     if (!pcbddc->use_vertices && reuse_solvers) {
5690       PetscInt n_vertices;
5691 
5692       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5693       reuse_solvers = (PetscBool)!n_vertices;
5694     }
5695     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5696     if (!isseqaij) {
5697       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5698       if (matis->A == pcbddc->local_mat) {
5699         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5700         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5701       } else {
5702         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5703       }
5704     }
5705     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5706   }
5707   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5708 
5709   /* free adjacency */
5710   if (free_used_adj) {
5711     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5712   }
5713   PetscFunctionReturn(0);
5714 }
5715 
5716 #undef __FUNCT__
5717 #define __FUNCT__ "PCBDDCInitSubSchurs"
5718 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5719 {
5720   PC_IS               *pcis=(PC_IS*)pc->data;
5721   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5722   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5723   PCBDDCGraph         graph;
5724   PetscErrorCode      ierr;
5725 
5726   PetscFunctionBegin;
5727   /* attach interface graph for determining subsets */
5728   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5729     IS       verticesIS,verticescomm;
5730     PetscInt vsize,*idxs;
5731 
5732     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5733     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5734     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5735     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5736     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5737     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5738     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5739     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5740     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5741     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5742     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5743 /*
5744     if (pcbddc->dbg_flag) {
5745       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5746     }
5747 */
5748   } else {
5749     graph = pcbddc->mat_graph;
5750   }
5751 
5752   /* sub_schurs init */
5753   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5754 
5755   /* free graph struct */
5756   if (pcbddc->sub_schurs_rebuild) {
5757     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5758   }
5759   PetscFunctionReturn(0);
5760 }
5761 
5762 #undef __FUNCT__
5763 #define __FUNCT__ "PCBDDCCheckOperator"
5764 PetscErrorCode PCBDDCCheckOperator(PC pc)
5765 {
5766   PC_IS               *pcis=(PC_IS*)pc->data;
5767   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5768   PetscErrorCode      ierr;
5769 
5770   PetscFunctionBegin;
5771   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5772     IS             zerodiag = NULL;
5773     Mat            S_j,B0_B=NULL;
5774     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5775     PetscScalar    *p0_check,*array,*array2;
5776     PetscReal      norm;
5777     PetscInt       i;
5778 
5779     /* B0 and B0_B */
5780     if (zerodiag) {
5781       IS       dummy;
5782 
5783       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5784       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5785       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5786       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5787     }
5788     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5789     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5790     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5791     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5792     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5793     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5794     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5795     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5796     /* S_j */
5797     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5798     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5799 
5800     /* mimic vector in \widetilde{W}_\Gamma */
5801     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5802     /* continuous in primal space */
5803     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5804     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5805     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5806     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5807     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5808     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5809     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5810     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5811     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5812     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5813     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5814     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5815     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5816     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5817 
5818     /* assemble rhs for coarse problem */
5819     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5820     /* local with Schur */
5821     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5822     if (zerodiag) {
5823       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5824       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5825       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5826       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5827     }
5828     /* sum on primal nodes the local contributions */
5829     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5830     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5831     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5832     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5833     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5834     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5835     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5836     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5837     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5838     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5839     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5840     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5841     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5842     /* scale primal nodes (BDDC sums contibutions) */
5843     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5844     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5845     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5846     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5847     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5848     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5849     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5850     /* global: \widetilde{B0}_B w_\Gamma */
5851     if (zerodiag) {
5852       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5853       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5854       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5855       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5856     }
5857     /* BDDC */
5858     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5859     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5860 
5861     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5862     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5863     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5864     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5865     for (i=0;i<pcbddc->benign_n;i++) {
5866       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5867     }
5868     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5869     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5870     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5871     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5872     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5873     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5874   }
5875   PetscFunctionReturn(0);
5876 }
5877