xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ca92afb2eb2a7a3e8c6d08bb1a5d2ee5ff3ddd95)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146   /* partial clean up */
147   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
148   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
149   if (A != B) {
150     ierr = MatDestroy(&B);CHKERRQ(ierr);
151   }
152 
153   /* get back data */
154   if (ncc) *ncc = graph->ncc;
155   if (cc) {
156     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
157     for (i=0;i<graph->ncc;i++) {
158       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
159     }
160     *cc = cc_n;
161   }
162   /* clean up graph */
163   graph->xadj = 0;
164   graph->adjncy = 0;
165   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "PCBDDCBenignCheck"
171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
172 {
173   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
174   PC_IS*         pcis = (PC_IS*)(pc->data);
175   IS             dirIS = NULL;
176   PetscInt       i;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
181   if (zerodiag) {
182     Mat            A;
183     Vec            vec3_N;
184     PetscScalar    *vals;
185     const PetscInt *idxs;
186     PetscInt       nz;
187 
188     /* p0 */
189     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
190     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
191     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
192     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
193     for (i=0;i<nz;i++) vals[i] = 1.;
194     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
195     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
196     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
197     /* v_I */
198     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
199     for (i=0;i<nz;i++) vals[i] = 0.;
200     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
201     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
202     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
203     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
204     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
205     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
206     if (dirIS) {
207       PetscInt n;
208 
209       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
210       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
211       for (i=0;i<n;i++) vals[i] = 0.;
212       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
213       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
214     }
215     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
216     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
218     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
219     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
220     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
221     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
222     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
223       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
224     }
225     ierr = PetscFree(vals);CHKERRQ(ierr);
226     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
227   }
228   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
229 
230   /* check PCBDDCBenignGetOrSetP0 */
231   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
232   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
233   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
234   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
235   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
236   for (i=0;i<pcbddc->benign_n;i++) {
237     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
238       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
239     }
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
247 {
248   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
249   IS             pressures,zerodiag,*zerodiag_subs;
250   PetscInt       nz,n;
251   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
256   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
257   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
258   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
259      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
260      Checks if all the pressure dofs in each subdomain have a zero diagonal
261      If not, a change of basis on pressures is not needed
262      since the local Schur complements are SPD
263   */
264   has_null_pressures = PETSC_TRUE;
265   have_null = PETSC_TRUE;
266   if (pcbddc->n_ISForDofsLocal) {
267     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
268 
269     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
270     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
271     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
272     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
273     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
274     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
275     if (!sorted) {
276       ierr = ISSort(pressures);CHKERRQ(ierr);
277     }
278   } else {
279     pressures = NULL;
280   }
281   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
282   ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr);
283   /* TODO: add check for shared dofs and raise error */
284   ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr);
285   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
286   if (!sorted) {
287     ierr = ISSort(zerodiag);CHKERRQ(ierr);
288   }
289   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
290   if (!nz) {
291     if (n) have_null = PETSC_FALSE;
292     has_null_pressures = PETSC_FALSE;
293     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
294   }
295   recompute_zerodiag = PETSC_FALSE;
296   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
297   zerodiag_subs = NULL;
298   pcbddc->benign_n = 0;
299   if (has_null_pressures) {
300     IS       *subs;
301     PetscInt nsubs,i;
302 
303     subs = pcbddc->local_subs;
304     nsubs = pcbddc->n_local_subs;
305     if (nsubs > 1) {
306       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
307       for (i=0;i<nsubs;i++) {
308         ISLocalToGlobalMapping l2g;
309         IS                     t_zerodiag_subs;
310         PetscInt               nl;
311 
312         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
313         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
314         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
315         if (nl) {
316           PetscBool valid = PETSC_TRUE;
317 
318           if (pressures) {
319             IS t_pressure_subs;
320             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
321             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
322             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
323           }
324           if (valid) {
325             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
326             pcbddc->benign_n++;
327           } else {
328             recompute_zerodiag = PETSC_TRUE;
329           }
330         }
331         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
332         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
333       }
334     } else { /* there's just one subdomain (or zero if they have not been detected */
335       PetscBool valid = PETSC_TRUE;
336       if (pressures) {
337         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
338       }
339       if (valid) {
340         pcbddc->benign_n = 1;
341         ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr);
342         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
343         zerodiag_subs[0] = zerodiag;
344       }
345     }
346   }
347 
348   if (!pcbddc->benign_n) {
349     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
350     recompute_zerodiag = PETSC_FALSE;
351     has_null_pressures = PETSC_FALSE;
352     have_null = PETSC_FALSE;
353   }
354 
355   /* final check for null pressures */
356   if (zerodiag && pressures) {
357     PetscInt nz,np;
358     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
359     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
360     if (nz != np) have_null = PETSC_FALSE;
361   }
362 
363   if (recompute_zerodiag) {
364     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
365     if (pcbddc->benign_n == 1) {
366       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
367       zerodiag = zerodiag_subs[0];
368     } else {
369       PetscInt i,nzn,*new_idxs;
370 
371       nzn = 0;
372       for (i=0;i<pcbddc->benign_n;i++) {
373         PetscInt ns;
374         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
375         nzn += ns;
376       }
377       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
378       nzn = 0;
379       for (i=0;i<pcbddc->benign_n;i++) {
380         PetscInt ns,*idxs;
381         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
382         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
383         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
384         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         nzn += ns;
386       }
387       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
388       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
389     }
390     have_null = PETSC_FALSE;
391   }
392 
393   if (has_null_pressures) {
394     IS             zerodiagc;
395     const PetscInt *idxs,*idxsc;
396     PetscInt       i,s,*nnz;
397 
398     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
399     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
400     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
401     /* local change of basis for pressures */
402     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
403     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr);
404     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
405     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
406     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
407     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
408     for (i=0;i<pcbddc->benign_n;i++) {
409       PetscInt nzs,j;
410 
411       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
412       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
413       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
414       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
415       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
416     }
417     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
418     ierr = PetscFree(nnz);CHKERRQ(ierr);
419     /* set identity on velocities */
420     for (i=0;i<n-nz;i++) {
421       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
422     }
423     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
424     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
425     ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
426     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
427     /* set change on pressures */
428     for (s=0;s<pcbddc->benign_n;s++) {
429       PetscScalar *array;
430       PetscInt    nzs;
431 
432       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
433       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
434       for (i=0;i<nzs-1;i++) {
435         PetscScalar vals[2];
436         PetscInt    cols[2];
437 
438         cols[0] = idxs[i];
439         cols[1] = idxs[nzs-1];
440         vals[0] = 1.;
441         vals[1] = 1.;
442         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
443       }
444       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
445       for (i=0;i<nzs-1;i++) array[i] = -1.;
446       array[nzs-1] = 1.;
447       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
448       /* store local idxs for p0 */
449       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
450       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
451       ierr = PetscFree(array);CHKERRQ(ierr);
452     }
453     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455     /* TODO: need optimization? */
456     ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr);
457     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
458     ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr);
459     ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
460     /* store global idxs for p0 */
461     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
462   }
463   pcbddc->benign_zerodiag_subs = zerodiag_subs;
464   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
465 
466   /* determines if the coarse solver will be singular or not */
467   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
468   *zerodiaglocal = zerodiag;
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
474 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
475 {
476   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
477   PetscErrorCode ierr;
478 
479   PetscFunctionBegin;
480   if (!pcbddc->benign_sf) {
481     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
482     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
483   }
484   if (get) { /* use SF to get values */
485     PetscScalar *array;
486 
487     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
488     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
489     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
490     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
491   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
492     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
493     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
494     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
495   }
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
501 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
502 {
503   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   /* TODO: add error checking
508     - avoid nested pop (or push) calls.
509     - cannot push before pop.
510     - cannot call this if pcbddc->local_mat is NULL
511   */
512   if (!pcbddc->benign_n) {
513     PetscFunctionReturn(0);
514   }
515   if (pop) {
516     IS       is_p0;
517     MatReuse reuse;
518 
519     /* extract B_0 */
520     reuse = MAT_INITIAL_MATRIX;
521     if (pcbddc->benign_B0) {
522       reuse = MAT_REUSE_MATRIX;
523     }
524     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
525     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
526     /* remove rows and cols from local problem */
527     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
528     ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
529     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
530     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
531   } else { /* push */
532     PetscInt i;
533 
534     for (i=0;i<pcbddc->benign_n;i++) {
535       PetscScalar *B0_vals;
536       PetscInt    *B0_cols,B0_ncol;
537 
538       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
539       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
540       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
541       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
542       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
543     }
544     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "PCBDDCAdaptiveSelection"
552 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
553 {
554   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
555   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
556   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
557   PetscBLASInt    *B_iwork,*B_ifail;
558   PetscScalar     *work,lwork;
559   PetscScalar     *St,*S,*eigv;
560   PetscScalar     *Sarray,*Starray;
561   PetscReal       *eigs,thresh;
562   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
563   PetscBool       allocated_S_St;
564 #if defined(PETSC_USE_COMPLEX)
565   PetscReal       *rwork;
566 #endif
567   PetscErrorCode  ierr;
568 
569   PetscFunctionBegin;
570   if (!sub_schurs->use_mumps) {
571     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
572   }
573 
574   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
575     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
576   }
577 
578   if (pcbddc->dbg_flag) {
579     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
580     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
581     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
582     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
583   }
584 
585   if (pcbddc->dbg_flag) {
586     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
587   }
588 
589   /* max size of subsets */
590   mss = 0;
591   for (i=0;i<sub_schurs->n_subs;i++) {
592     PetscInt subset_size;
593 
594     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
595     mss = PetscMax(mss,subset_size);
596   }
597 
598   /* min/max and threshold */
599   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
600   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
601   nmax = PetscMax(nmin,nmax);
602   allocated_S_St = PETSC_FALSE;
603   if (nmin) {
604     allocated_S_St = PETSC_TRUE;
605   }
606 
607   /* allocate lapack workspace */
608   cum = cum2 = 0;
609   maxneigs = 0;
610   for (i=0;i<sub_schurs->n_subs;i++) {
611     PetscInt n,subset_size;
612 
613     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
614     n = PetscMin(subset_size,nmax);
615     cum += subset_size;
616     cum2 += subset_size*n;
617     maxneigs = PetscMax(maxneigs,n);
618   }
619   if (mss) {
620     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
621       PetscBLASInt B_itype = 1;
622       PetscBLASInt B_N = mss;
623       PetscReal    zero = 0.0;
624       PetscReal    eps = 0.0; /* dlamch? */
625 
626       B_lwork = -1;
627       S = NULL;
628       St = NULL;
629       eigs = NULL;
630       eigv = NULL;
631       B_iwork = NULL;
632       B_ifail = NULL;
633 #if defined(PETSC_USE_COMPLEX)
634       rwork = NULL;
635 #endif
636       thresh = 1.0;
637       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
638 #if defined(PETSC_USE_COMPLEX)
639       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
640 #else
641       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
642 #endif
643       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
644       ierr = PetscFPTrapPop();CHKERRQ(ierr);
645     } else {
646         /* TODO */
647     }
648   } else {
649     lwork = 0;
650   }
651 
652   nv = 0;
653   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
654     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
655   }
656   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
657   if (allocated_S_St) {
658     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
659   }
660   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
661 #if defined(PETSC_USE_COMPLEX)
662   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
663 #endif
664   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
665                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
666                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
667                       nv+cum,&pcbddc->adaptive_constraints_idxs,
668                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
669   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
670 
671   maxneigs = 0;
672   cum = cum2 = cumarray = 0;
673   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
674   pcbddc->adaptive_constraints_data_ptr[0] = 0;
675   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
676     const PetscInt *idxs;
677 
678     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
679     for (cum=0;cum<nv;cum++) {
680       pcbddc->adaptive_constraints_n[cum] = 1;
681       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
682       pcbddc->adaptive_constraints_data[cum] = 1.0;
683       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
684       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
685     }
686     cum2 = cum;
687     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
688   }
689 
690   if (mss) { /* multilevel */
691     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
692     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
693   }
694 
695   thresh = pcbddc->adaptive_threshold;
696   for (i=0;i<sub_schurs->n_subs;i++) {
697     const PetscInt *idxs;
698     PetscReal      infty = PETSC_MAX_REAL;
699     PetscInt       j,subset_size,eigs_start = 0;
700     PetscBLASInt   B_N;
701     PetscBool      same_data = PETSC_FALSE;
702 
703     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
704     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
705     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
706     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
707       if (sub_schurs->is_hermitian) {
708         PetscInt j,k;
709         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
710           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
711           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
712         }
713         for (j=0;j<subset_size;j++) {
714           for (k=j;k<subset_size;k++) {
715             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
716             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
717           }
718         }
719       } else {
720         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
721         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
722       }
723     } else {
724       S = Sarray + cumarray;
725       St = Starray + cumarray;
726     }
727 
728     /* see if we can save some work */
729     if (sub_schurs->n_subs == 1) {
730       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
731     }
732 
733     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
734       B_neigs = 0;
735     } else {
736       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
737         PetscBLASInt B_itype = 1;
738         PetscBLASInt B_IL, B_IU;
739         PetscReal    eps = -1.0; /* dlamch? */
740         PetscInt     nmin_s;
741 
742         if (pcbddc->dbg_flag) {
743           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
744         }
745 
746         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
747         if (thresh > 1.+PETSC_SMALL) {
748 
749           /* ask for eigenvalues larger than thresh */
750 #if defined(PETSC_USE_COMPLEX)
751           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
752 #else
753           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
754 #endif
755         } else {
756           B_IU = PetscMax(1,PetscMin(B_N,nmax));
757           B_IL = 1;
758 #if defined(PETSC_USE_COMPLEX)
759           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
760 #else
761           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
762 #endif
763         }
764         ierr = PetscFPTrapPop();CHKERRQ(ierr);
765         if (B_ierr) {
766           if (B_ierr < 0 ) {
767             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
768           } else if (B_ierr <= B_N) {
769             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
770           } else {
771             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
772           }
773         }
774 
775         if (B_neigs > nmax) {
776           if (pcbddc->dbg_flag) {
777             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
778           }
779           eigs_start = B_neigs -nmax;
780           B_neigs = nmax;
781         }
782 
783         nmin_s = PetscMin(nmin,B_N);
784         if (B_neigs < nmin_s) {
785           PetscBLASInt B_neigs2;
786 
787           B_IU = B_N - B_neigs;
788           B_IL = B_N - nmin_s + 1;
789           if (pcbddc->dbg_flag) {
790             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
791           }
792           if (sub_schurs->is_hermitian) {
793             PetscInt j;
794             for (j=0;j<subset_size;j++) {
795               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
796             }
797             for (j=0;j<subset_size;j++) {
798               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
799             }
800           } else {
801             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
802             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
803           }
804           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
805 #if defined(PETSC_USE_COMPLEX)
806           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
807 #else
808           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
809 #endif
810           ierr = PetscFPTrapPop();CHKERRQ(ierr);
811           B_neigs += B_neigs2;
812         }
813         if (B_ierr) {
814           if (B_ierr < 0 ) {
815             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
816           } else if (B_ierr <= B_N) {
817             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
818           } else {
819             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
820           }
821         }
822         if (pcbddc->dbg_flag) {
823           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
824           for (j=0;j<B_neigs;j++) {
825             if (eigs[j] == 0.0) {
826               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
827             } else {
828               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
829             }
830           }
831         }
832       } else {
833           /* TODO */
834       }
835     }
836     maxneigs = PetscMax(B_neigs,maxneigs);
837     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
838     if (B_neigs) {
839       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
840 
841       if (pcbddc->dbg_flag > 1) {
842         PetscInt ii;
843         for (ii=0;ii<B_neigs;ii++) {
844           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
845           for (j=0;j<B_N;j++) {
846 #if defined(PETSC_USE_COMPLEX)
847             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
848             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
849             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
850 #else
851             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
852 #endif
853           }
854         }
855       }
856 #if 0
857       for (j=0;j<B_neigs;j++) {
858         PetscBLASInt Blas_N,Blas_one = 1.0;
859         PetscScalar norm;
860         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
861         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
862                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
863         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
864           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
865         } else {
866           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
867         }
868         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
869       }
870 #endif
871       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
872       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
873       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
874       cum++;
875     }
876     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
877     /* shift for next computation */
878     cumarray += subset_size*subset_size;
879   }
880   if (pcbddc->dbg_flag) {
881     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
882   }
883 
884   if (mss) {
885     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
886     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
887     /* destroy matrices (junk) */
888     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
889     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
890   }
891   if (allocated_S_St) {
892     ierr = PetscFree2(S,St);CHKERRQ(ierr);
893   }
894   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
895 #if defined(PETSC_USE_COMPLEX)
896   ierr = PetscFree(rwork);CHKERRQ(ierr);
897 #endif
898   if (pcbddc->dbg_flag) {
899     PetscInt maxneigs_r;
900     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
901     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
902   }
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "PCBDDCSetUpSolvers"
908 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
909 {
910   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
911   PetscScalar    *coarse_submat_vals;
912   PetscErrorCode ierr;
913 
914   PetscFunctionBegin;
915   /* Setup local scatters R_to_B and (optionally) R_to_D */
916   /* PCBDDCSetUpLocalWorkVectors should be called first! */
917   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
918 
919   /* Setup local neumann solver ksp_R */
920   /* PCBDDCSetUpLocalScatters should be called first! */
921   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
922 
923   /* Change global null space passed in by the user if change of basis has been requested */
924   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
925     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
926   }
927 
928   /*
929      Setup local correction and local part of coarse basis.
930      Gives back the dense local part of the coarse matrix in column major ordering
931   */
932   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
933 
934   /* Compute total number of coarse nodes and setup coarse solver */
935   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
936 
937   /* free */
938   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "PCBDDCResetCustomization"
944 PetscErrorCode PCBDDCResetCustomization(PC pc)
945 {
946   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
951   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
952   ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
953   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
954   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
955   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
956   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
957   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
958   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
959   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
960   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
961   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "PCBDDCResetTopography"
967 PetscErrorCode PCBDDCResetTopography(PC pc)
968 {
969   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
970   PetscInt       i;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
975   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
976   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
977   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
978   for (i=0;i<pcbddc->n_local_subs;i++) {
979     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
980   }
981   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
982   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "PCBDDCResetSolvers"
988 PetscErrorCode PCBDDCResetSolvers(PC pc)
989 {
990   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
991   PetscErrorCode ierr;
992 
993   PetscFunctionBegin;
994   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
995   if (pcbddc->coarse_phi_B) {
996     PetscScalar *array;
997     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
998     ierr = PetscFree(array);CHKERRQ(ierr);
999   }
1000   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1001   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1002   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1003   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1004   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1005   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1006   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1007   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1008   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1009   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1010   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1011   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1012   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1013   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1014   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1015   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1016   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1017   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1018   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1019   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1020   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1021   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1022   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1023   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1024   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1025   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1026   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1027   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1028   if (pcbddc->benign_zerodiag_subs) {
1029     PetscInt i;
1030     for (i=0;i<pcbddc->benign_n;i++) {
1031       ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr);
1032     }
1033     ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr);
1034   }
1035   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1041 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1042 {
1043   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1044   PC_IS          *pcis = (PC_IS*)pc->data;
1045   VecType        impVecType;
1046   PetscInt       n_constraints,n_R,old_size;
1047   PetscErrorCode ierr;
1048 
1049   PetscFunctionBegin;
1050   if (!pcbddc->ConstraintMatrix) {
1051     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1052   }
1053   /* get sizes */
1054   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1055   n_R = pcis->n - pcbddc->n_vertices;
1056   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1057   /* local work vectors (try to avoid unneeded work)*/
1058   /* R nodes */
1059   old_size = -1;
1060   if (pcbddc->vec1_R) {
1061     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1062   }
1063   if (n_R != old_size) {
1064     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1065     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1066     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1067     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1068     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1069     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1070   }
1071   /* local primal dofs */
1072   old_size = -1;
1073   if (pcbddc->vec1_P) {
1074     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1075   }
1076   if (pcbddc->local_primal_size != old_size) {
1077     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1078     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1079     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1080     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1081   }
1082   /* local explicit constraints */
1083   old_size = -1;
1084   if (pcbddc->vec1_C) {
1085     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1086   }
1087   if (n_constraints && n_constraints != old_size) {
1088     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1089     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1090     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1091     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "PCBDDCSetUpCorrection"
1098 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1099 {
1100   PetscErrorCode  ierr;
1101   /* pointers to pcis and pcbddc */
1102   PC_IS*          pcis = (PC_IS*)pc->data;
1103   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1104   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1105   /* submatrices of local problem */
1106   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1107   /* submatrices of benign trick */
1108   Mat             B0_V = NULL;
1109   /* submatrices of local coarse problem */
1110   Mat             S_VV,S_CV,S_VC,S_CC;
1111   /* working matrices */
1112   Mat             C_CR;
1113   /* additional working stuff */
1114   PC              pc_R;
1115   Mat             F;
1116   PetscBool       isLU,isCHOL,isILU;
1117 
1118   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1119   PetscScalar     *work;
1120   PetscInt        *idx_V_B;
1121   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
1122   PetscInt        i,n_R,n_D,n_B;
1123 
1124   /* some shortcuts to scalars */
1125   PetscScalar     one=1.0,m_one=-1.0;
1126 
1127   PetscFunctionBegin;
1128   if (!pcbddc->symmetric_primal && pcbddc->benign_n) {
1129     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
1130   }
1131 
1132   /* Set Non-overlapping dimensions */
1133   n_vertices = pcbddc->n_vertices;
1134   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1135   n_B = pcis->n_B;
1136   n_D = pcis->n - n_B;
1137   n_R = pcis->n - n_vertices;
1138 
1139   /* vertices in boundary numbering */
1140   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1141   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1142   if (i != n_vertices) {
1143     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1144   }
1145 
1146   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1147   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1148   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1149   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1150   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1151   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1152   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1153   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1154   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1155   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1156 
1157   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1158   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1159   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1160   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1161   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1162   lda_rhs = n_R;
1163   if (isLU || isILU || isCHOL) {
1164     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1165   } else if (sub_schurs->reuse_mumps) {
1166     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1167     MatFactorType type;
1168 
1169     F = reuse_mumps->F;
1170     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1171     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1172     ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr);
1173   } else {
1174     F = NULL;
1175   }
1176 
1177   /* allocate workspace */
1178   n = 0;
1179   if (n_constraints) {
1180     n += lda_rhs*n_constraints;
1181   }
1182   if (n_vertices) {
1183     n = PetscMax(2*lda_rhs*n_vertices,n);
1184     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
1185   }
1186   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1187 
1188   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1189   if (n_constraints) {
1190     Mat         M1,M2,M3;
1191     Mat         auxmat;
1192     IS          is_aux;
1193     PetscScalar *array,*array2;
1194 
1195     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1196     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1197 
1198     /* Extract constraints on R nodes: C_{CR}  */
1199     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1200     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1201     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1202 
1203     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1204     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1205     ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1206     for (i=0;i<n_constraints;i++) {
1207       const PetscScalar *row_cmat_values;
1208       const PetscInt    *row_cmat_indices;
1209       PetscInt          size_of_constraint,j;
1210 
1211       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1212       for (j=0;j<size_of_constraint;j++) {
1213         work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
1214       }
1215       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1216     }
1217     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1218     if (F) {
1219       Mat B;
1220 
1221       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1222       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1223       ierr = MatDestroy(&B);CHKERRQ(ierr);
1224     } else {
1225       PetscScalar *marr;
1226 
1227       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1228       for (i=0;i<n_constraints;i++) {
1229         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1230         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr);
1231         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1232         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1233         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1234       }
1235       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1236     }
1237     if (!pcbddc->switch_static) {
1238       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1239       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1240       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1241       for (i=0;i<n_constraints;i++) {
1242         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr);
1243         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1244         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1247         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1248       }
1249       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1250       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1251       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1252     } else {
1253       if (lda_rhs != n_R) {
1254         IS dummy;
1255 
1256         ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr);
1257         ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1258         ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1259       } else {
1260         ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1261         pcbddc->local_auxmat2 = local_auxmat2_R;
1262       }
1263       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1264     }
1265     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1266     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1267     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1268     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1269     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1270     if (isCHOL) {
1271       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1272     } else {
1273       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1274     }
1275     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1276     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1277     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1278     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1279     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1280     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1281     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1282     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1283     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1284     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1285   }
1286   /* Get submatrices from subdomain matrix */
1287   if (pcbddc->benign_n) {
1288     IS        dummy;
1289     Mat       B0_R;
1290     PetscReal norm;
1291 
1292     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1293     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1294     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1295     if (norm > PETSC_SMALL) {
1296       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1297     }
1298     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1299     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1300   }
1301 
1302   if (n_vertices) {
1303     IS is_aux;
1304 
1305     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1306       IS tis;
1307 
1308       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1309       ierr = ISSort(tis);CHKERRQ(ierr);
1310       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1311       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1312     } else {
1313       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1314     }
1315     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1316     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1317     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1318     if (pcbddc->benign_n) {
1319       IS dummy;
1320 
1321       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1322       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1323       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1324     }
1325     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1326   }
1327 
1328   /* Matrix of coarse basis functions (local) */
1329   if (pcbddc->coarse_phi_B) {
1330     PetscInt on_B,on_primal,on_D=n_D;
1331     if (pcbddc->coarse_phi_D) {
1332       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1333     }
1334     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1335     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1336       PetscScalar *marray;
1337 
1338       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1339       ierr = PetscFree(marray);CHKERRQ(ierr);
1340       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1341       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1342       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1343       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1344     }
1345   }
1346 
1347   if (!pcbddc->coarse_phi_B) {
1348     PetscScalar *marray;
1349 
1350     n = n_B*pcbddc->local_primal_size;
1351     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1352       n += n_D*pcbddc->local_primal_size;
1353     }
1354     if (!pcbddc->symmetric_primal) {
1355       n *= 2;
1356     }
1357     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1358     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1359     n = n_B*pcbddc->local_primal_size;
1360     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1361       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1362       n += n_D*pcbddc->local_primal_size;
1363     }
1364     if (!pcbddc->symmetric_primal) {
1365       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1366       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1367         n = n_B*pcbddc->local_primal_size;
1368         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1369       }
1370     } else {
1371       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1372       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1373       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1374         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1375         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1376       }
1377     }
1378   }
1379 
1380   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1381   p0_lidx_I = NULL;
1382   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1383     const PetscInt *idxs;
1384 
1385     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1386     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1387     for (i=0;i<pcbddc->benign_n;i++) {
1388       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1389     }
1390     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1391   }
1392 
1393   /* vertices */
1394   if (n_vertices) {
1395 
1396     ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1397 
1398     if (n_R) {
1399       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1400       PetscBLASInt B_N,B_one = 1;
1401       PetscScalar  *x,*y;
1402       PetscBool    isseqaij;
1403 
1404       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1405       if (lda_rhs == n_R) {
1406         ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1407       } else {
1408         PetscScalar    *av,*array;
1409         const PetscInt *xadj,*adjncy;
1410         PetscInt       n;
1411         PetscBool      flg_row;
1412 
1413         array = work+lda_rhs*n_vertices;
1414         ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1415         ierr = MatConvert(A_RV,MATSEQAIJ,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1416         ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1417         ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr);
1418         for (i=0;i<n;i++) {
1419           PetscInt j;
1420           for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
1421         }
1422         ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1423         ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1424         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr);
1425       }
1426       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1427       if (F) { /* TODO could be optimized for symmetric problems */
1428         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1429       } else {
1430         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1431         for (i=0;i<n_vertices;i++) {
1432           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr);
1433           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr);
1434           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1435           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1436           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1437         }
1438         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1439       }
1440       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1441       /* S_VV and S_CV */
1442       if (n_constraints) {
1443         Mat B;
1444 
1445         ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1446         for (i=0;i<n_vertices;i++) {
1447           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1448           ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr);
1449           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1451           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1452           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1453         }
1454         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1455         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1456         ierr = MatDestroy(&B);CHKERRQ(ierr);
1457         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1458         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1459         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1460         ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr);
1461         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
1462         ierr = MatDestroy(&B);CHKERRQ(ierr);
1463       }
1464       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1465       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1466         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1467       }
1468       if (lda_rhs != n_R) {
1469         ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1470         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1471         ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr);
1472       }
1473       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1474       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1475       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1476       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1477       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1478       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1479       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1480       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1481       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1482       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1483     } else {
1484       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1485     }
1486     for (i=0;i<pcbddc->benign_n;i++) {
1487       const PetscScalar *vals;
1488       const PetscInt    *idxs;
1489       PetscInt          n,j,primal_idx;
1490 
1491       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1492       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1493       for (j=0;j<n;j++) {
1494         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1495         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1496       }
1497       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1498     }
1499     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1500 
1501     /* coarse basis functions */
1502     for (i=0;i<n_vertices;i++) {
1503       PetscScalar *y;
1504 
1505       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1506       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1507       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1508       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510       y[n_B*i+idx_V_B[i]] = 1.0;
1511       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1512       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1513 
1514       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1515         PetscInt j;
1516 
1517         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1518         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1519         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1522         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1523         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1524       }
1525       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1526     }
1527     /* if n_R == 0 the object is not destroyed */
1528     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1529   }
1530 
1531   if (n_constraints) {
1532     Mat B;
1533 
1534     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1535     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1536     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1537     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1538     if (n_vertices) {
1539       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1540         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1541       } else {
1542         Mat S_VCt;
1543 
1544         if (lda_rhs != n_R) {
1545           ierr = MatDestroy(&B);CHKERRQ(ierr);
1546           ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr);
1547           ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr);
1548         }
1549         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1550         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1551         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = MatDestroy(&B);CHKERRQ(ierr);
1555     /* coarse basis functions */
1556     for (i=0;i<n_constraints;i++) {
1557       PetscScalar *y;
1558 
1559       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1560       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1561       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1562       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1565       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1566       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1567         PetscInt j;
1568 
1569         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1570         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1571         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1574         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1575         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1576       }
1577       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1578     }
1579   }
1580   if (n_constraints) {
1581     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1582   }
1583   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1584   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1585 
1586   /* compute other basis functions for non-symmetric problems */
1587   if (!pcbddc->symmetric_primal) {
1588     Mat         B_V=NULL,B_C=NULL;
1589     PetscScalar *marray;
1590 
1591     if (n_constraints) {
1592       Mat S_CCT,C_CRT;
1593 
1594       ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr);
1595       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1596       ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1597       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1598       if (n_vertices) {
1599         Mat S_VCT;
1600 
1601         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1602         ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1603         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1604       }
1605       ierr = MatDestroy(&C_CRT);CHKERRQ(ierr);
1606     }
1607     if (n_vertices && n_R) {
1608       PetscScalar    *av,*marray;
1609       const PetscInt *xadj,*adjncy;
1610       PetscInt       n;
1611       PetscBool      flg_row;
1612 
1613       /* B_V = B_V - A_VR^T */
1614       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1615       ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1616       ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr);
1617       ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1618       for (i=0;i<n;i++) {
1619         PetscInt j;
1620         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
1621       }
1622       ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1623       ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1624       ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1625     }
1626 
1627     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1628     ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1629     for (i=0;i<n_vertices;i++) {
1630       ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr);
1631       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1632       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1633       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1634       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1635     }
1636     ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1637     ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr);
1638     for (i=n_vertices;i<n_constraints+n_vertices;i++) {
1639       ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr);
1640       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1641       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1642       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1643       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1644     }
1645     ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr);
1646     /* coarse basis functions */
1647     for (i=0;i<pcbddc->local_primal_size;i++) {
1648       PetscScalar *y;
1649 
1650       ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1651       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1652       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1653       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655       if (i<n_vertices) {
1656         y[n_B*i+idx_V_B[i]] = 1.0;
1657       }
1658       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1659       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1660 
1661       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1662         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1663         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1664         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1667         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1668       }
1669       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1670     }
1671     ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1672     ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1673   }
1674   /* free memory */
1675   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1676   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1677   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1678   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1679   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1680   ierr = PetscFree(work);CHKERRQ(ierr);
1681   if (n_vertices) {
1682     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1683   }
1684   if (n_constraints) {
1685     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1686   }
1687   /* Checking coarse_sub_mat and coarse basis functios */
1688   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1689   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1690   if (pcbddc->dbg_flag) {
1691     Mat         coarse_sub_mat;
1692     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1693     Mat         coarse_phi_D,coarse_phi_B;
1694     Mat         coarse_psi_D,coarse_psi_B;
1695     Mat         A_II,A_BB,A_IB,A_BI;
1696     Mat         C_B,CPHI;
1697     IS          is_dummy;
1698     Vec         mones;
1699     MatType     checkmattype=MATSEQAIJ;
1700     PetscReal   real_value;
1701 
1702     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1703     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1704     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1705     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1706     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1707     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1708     if (!pcbddc->symmetric_primal) {
1709       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1710       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1711     }
1712     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1713 
1714     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1715     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1716     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1717     if (!pcbddc->symmetric_primal) {
1718       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1719       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1720       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1721       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1722       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1723       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1724       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1725       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1726       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1727       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1728       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1729       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1730     } else {
1731       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1732       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1733       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1734       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1735       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1736       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1737       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1738       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1739     }
1740     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1741     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1742     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1743     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1744     if (pcbddc->benign_n) {
1745       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1746       PetscScalar *data,*data2;
1747       PetscInt    j;
1748 
1749       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1750       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1751       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1752       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1753       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1754       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1755       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1756       for (j=0;j<pcbddc->benign_n;j++) {
1757         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1758         for (i=0;i<pcbddc->local_primal_size;i++) {
1759           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1760           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1761         }
1762       }
1763       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1764       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1765       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1766       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1767       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1768       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1769       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1770       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1771       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1772     }
1773 #if 0
1774   {
1775     PetscViewer viewer;
1776     char filename[256];
1777     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
1778     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1779     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1780     ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr);
1781     ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr);
1782     ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr);
1783     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1784     ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr);
1785     ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr);
1786     if (pcbddc->coarse_phi_D) {
1787       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr);
1788       ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr);
1789     }
1790     if (pcbddc->coarse_psi_B) {
1791       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr);
1792       ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr);
1793     }
1794     if (pcbddc->coarse_phi_B) {
1795       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr);
1796       ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr);
1797     }
1798     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1799   }
1800 #endif
1801     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1802     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1805 
1806     /* check constraints */
1807     if (!pcbddc->benign_n) { /* TODO: add benign case */
1808       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1809       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1810       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1811       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1812       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1813       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1814       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1815       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1816       if (!pcbddc->symmetric_primal) {
1817         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1818         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1819         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1820         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1821         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1822       }
1823       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1824       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1825       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1826       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1827     }
1828     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1829     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1830     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1831     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1832     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1833     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1834     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1835     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1836     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1837     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1838     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1839     if (!pcbddc->symmetric_primal) {
1840       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1841       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1842     }
1843     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1844   }
1845   /* get back data */
1846   *coarse_submat_vals_n = coarse_submat_vals;
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1852 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1853 {
1854   Mat            *work_mat;
1855   IS             isrow_s,iscol_s;
1856   PetscBool      rsorted,csorted;
1857   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1858   PetscErrorCode ierr;
1859 
1860   PetscFunctionBegin;
1861   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1862   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1863   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1864   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1865 
1866   if (!rsorted) {
1867     const PetscInt *idxs;
1868     PetscInt *idxs_sorted,i;
1869 
1870     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1871     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1872     for (i=0;i<rsize;i++) {
1873       idxs_perm_r[i] = i;
1874     }
1875     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1876     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1877     for (i=0;i<rsize;i++) {
1878       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1879     }
1880     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1881     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1882   } else {
1883     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1884     isrow_s = isrow;
1885   }
1886 
1887   if (!csorted) {
1888     if (isrow == iscol) {
1889       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1890       iscol_s = isrow_s;
1891     } else {
1892       const PetscInt *idxs;
1893       PetscInt *idxs_sorted,i;
1894 
1895       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1896       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1897       for (i=0;i<csize;i++) {
1898         idxs_perm_c[i] = i;
1899       }
1900       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1901       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1902       for (i=0;i<csize;i++) {
1903         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1904       }
1905       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1906       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1907     }
1908   } else {
1909     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1910     iscol_s = iscol;
1911   }
1912 
1913   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1914 
1915   if (!rsorted || !csorted) {
1916     Mat      new_mat;
1917     IS       is_perm_r,is_perm_c;
1918 
1919     if (!rsorted) {
1920       PetscInt *idxs_r,i;
1921       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1922       for (i=0;i<rsize;i++) {
1923         idxs_r[idxs_perm_r[i]] = i;
1924       }
1925       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1926       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1927     } else {
1928       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1929     }
1930     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1931 
1932     if (!csorted) {
1933       if (isrow_s == iscol_s) {
1934         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1935         is_perm_c = is_perm_r;
1936       } else {
1937         PetscInt *idxs_c,i;
1938         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1939         for (i=0;i<csize;i++) {
1940           idxs_c[idxs_perm_c[i]] = i;
1941         }
1942         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1943         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1944       }
1945     } else {
1946       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1947     }
1948     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1949 
1950     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1951     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1952     work_mat[0] = new_mat;
1953     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1954     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1955   }
1956 
1957   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1958   *B = work_mat[0];
1959   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1960   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1961   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1967 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1968 {
1969   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1970   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1971   Mat            new_mat;
1972   IS             is_local,is_global;
1973   PetscInt       local_size;
1974   PetscBool      isseqaij;
1975   PetscErrorCode ierr;
1976 
1977   PetscFunctionBegin;
1978   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1979   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1980   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1981   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1982   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1983   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1984   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1985 
1986   /* check */
1987   if (pcbddc->dbg_flag) {
1988     Vec       x,x_change;
1989     PetscReal error;
1990 
1991     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1992     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1993     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1994     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1995     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1996     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1997     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1998     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1999     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2000     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2001     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2002     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
2003     ierr = VecDestroy(&x);CHKERRQ(ierr);
2004     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2005   }
2006 
2007   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
2008   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
2009   if (isseqaij) {
2010     Mat M;
2011 
2012     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2013     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2014     ierr = MatDestroy(&M);CHKERRQ(ierr);
2015   } else {
2016     Mat work_mat,M;
2017 
2018     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
2019     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2020     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2021     ierr = MatDestroy(&M);CHKERRQ(ierr);
2022     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
2023   }
2024   if (matis->A->symmetric_set) {
2025     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
2026 #if !defined(PETSC_USE_COMPLEX)
2027     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
2028 #endif
2029   }
2030   /*
2031   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2032   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
2033   */
2034   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
2040 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2041 {
2042   PC_IS*          pcis = (PC_IS*)(pc->data);
2043   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2044   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2045   PetscInt        *idx_R_local=NULL;
2046   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2047   PetscInt        vbs,bs;
2048   PetscBT         bitmask=NULL;
2049   PetscErrorCode  ierr;
2050 
2051   PetscFunctionBegin;
2052   /*
2053     No need to setup local scatters if
2054       - primal space is unchanged
2055         AND
2056       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2057         AND
2058       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2059   */
2060   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2061     PetscFunctionReturn(0);
2062   }
2063   /* destroy old objects */
2064   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2065   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2066   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2067   /* Set Non-overlapping dimensions */
2068   n_B = pcis->n_B;
2069   n_D = pcis->n - n_B;
2070   n_vertices = pcbddc->n_vertices;
2071 
2072   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2073 
2074   /* create auxiliary bitmask and allocate workspace */
2075   if (!sub_schurs->reuse_mumps) {
2076     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2077     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2078     for (i=0;i<n_vertices;i++) {
2079       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2080     }
2081 
2082     for (i=0, n_R=0; i<pcis->n; i++) {
2083       if (!PetscBTLookup(bitmask,i)) {
2084         idx_R_local[n_R++] = i;
2085       }
2086     }
2087   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2088     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2089 
2090     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2091     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2092   }
2093 
2094   /* Block code */
2095   vbs = 1;
2096   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2097   if (bs>1 && !(n_vertices%bs)) {
2098     PetscBool is_blocked = PETSC_TRUE;
2099     PetscInt  *vary;
2100     if (!sub_schurs->reuse_mumps) {
2101       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2102       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2103       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2104       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2105       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2106       for (i=0; i<pcis->n/bs; i++) {
2107         if (vary[i]!=0 && vary[i]!=bs) {
2108           is_blocked = PETSC_FALSE;
2109           break;
2110         }
2111       }
2112       ierr = PetscFree(vary);CHKERRQ(ierr);
2113     } else {
2114       /* Verify directly the R set */
2115       for (i=0; i<n_R/bs; i++) {
2116         PetscInt j,node=idx_R_local[bs*i];
2117         for (j=1; j<bs; j++) {
2118           if (node != idx_R_local[bs*i+j]-j) {
2119             is_blocked = PETSC_FALSE;
2120             break;
2121           }
2122         }
2123       }
2124     }
2125     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2126       vbs = bs;
2127       for (i=0;i<n_R/vbs;i++) {
2128         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2129       }
2130     }
2131   }
2132   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2133   if (sub_schurs->reuse_mumps) {
2134     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2135 
2136     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2137     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2138     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2139     reuse_mumps->is_R = pcbddc->is_R_local;
2140   } else {
2141     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2142   }
2143 
2144   /* print some info if requested */
2145   if (pcbddc->dbg_flag) {
2146     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2147     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2148     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2149     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2150     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2151     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2152     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2153   }
2154 
2155   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2156   if (!sub_schurs->reuse_mumps) {
2157     IS       is_aux1,is_aux2;
2158     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2159 
2160     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2161     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2162     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2163     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2164     for (i=0; i<n_D; i++) {
2165       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2166     }
2167     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2168     for (i=0, j=0; i<n_R; i++) {
2169       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2170         aux_array1[j++] = i;
2171       }
2172     }
2173     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2174     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2175     for (i=0, j=0; i<n_B; i++) {
2176       if (!PetscBTLookup(bitmask,is_indices[i])) {
2177         aux_array2[j++] = i;
2178       }
2179     }
2180     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2181     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2182     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2183     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2184     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2185 
2186     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2187       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2188       for (i=0, j=0; i<n_R; i++) {
2189         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2190           aux_array1[j++] = i;
2191         }
2192       }
2193       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2194       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2195       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2196     }
2197     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2198     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2199   } else {
2200     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2201     IS               tis;
2202     PetscInt         schur_size;
2203 
2204     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2205     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2206     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2207     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2208     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2209       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2210       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2211       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2212     }
2213   }
2214   PetscFunctionReturn(0);
2215 }
2216 
2217 
2218 #undef __FUNCT__
2219 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2220 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2221 {
2222   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2223   PC_IS          *pcis = (PC_IS*)pc->data;
2224   PC             pc_temp;
2225   Mat            A_RR;
2226   MatReuse       reuse;
2227   PetscScalar    m_one = -1.0;
2228   PetscReal      value;
2229   PetscInt       n_D,n_R;
2230   PetscBool      use_exact,use_exact_reduced,issbaij;
2231   PetscErrorCode ierr;
2232   /* prefixes stuff */
2233   char           dir_prefix[256],neu_prefix[256],str_level[16];
2234   size_t         len;
2235 
2236   PetscFunctionBegin;
2237 
2238   /* compute prefixes */
2239   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2240   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2241   if (!pcbddc->current_level) {
2242     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2243     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2244     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2245     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2246   } else {
2247     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2248     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2249     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2250     len -= 15; /* remove "pc_bddc_coarse_" */
2251     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2252     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2253     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2254     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2255     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2256     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2257     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2258     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2259   }
2260 
2261   /* DIRICHLET PROBLEM */
2262   if (dirichlet) {
2263     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2264     if (pcbddc->local_mat->symmetric_set) {
2265       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2266     }
2267     /* Matrix for Dirichlet problem is pcis->A_II */
2268     n_D = pcis->n - pcis->n_B;
2269     if (!pcbddc->ksp_D) { /* create object if not yet build */
2270       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2271       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2272       /* default */
2273       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2274       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2275       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2276       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2277       if (issbaij) {
2278         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2279       } else {
2280         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2281       }
2282       /* Allow user's customization */
2283       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2284       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2285     }
2286     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2287     if (sub_schurs->reuse_mumps) {
2288       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2289 
2290       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2291     }
2292     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2293     if (!n_D) {
2294       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2295       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2296     }
2297     /* Set Up KSP for Dirichlet problem of BDDC */
2298     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2299     /* set ksp_D into pcis data */
2300     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2301     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2302     pcis->ksp_D = pcbddc->ksp_D;
2303   }
2304 
2305   /* NEUMANN PROBLEM */
2306   A_RR = 0;
2307   if (neumann) {
2308     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2309     PetscInt        ibs,mbs;
2310     PetscBool       issbaij;
2311     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2312     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2313     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2314     if (pcbddc->ksp_R) { /* already created ksp */
2315       PetscInt nn_R;
2316       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2317       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2318       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2319       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2320         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2321         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2322         reuse = MAT_INITIAL_MATRIX;
2323       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2324         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2325           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2326           reuse = MAT_INITIAL_MATRIX;
2327         } else { /* safe to reuse the matrix */
2328           reuse = MAT_REUSE_MATRIX;
2329         }
2330       }
2331       /* last check */
2332       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2333         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2334         reuse = MAT_INITIAL_MATRIX;
2335       }
2336     } else { /* first time, so we need to create the matrix */
2337       reuse = MAT_INITIAL_MATRIX;
2338     }
2339     /* extract A_RR */
2340     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2341     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2342     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2343     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2344       if (matis->A == pcbddc->local_mat) {
2345         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2346         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2347       } else {
2348         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2349       }
2350     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2351       if (matis->A == pcbddc->local_mat) {
2352         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2353         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2354       } else {
2355         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2356       }
2357     }
2358     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2359     if (pcbddc->local_mat->symmetric_set) {
2360       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2361     }
2362     if (!pcbddc->ksp_R) { /* create object if not present */
2363       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2364       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2365       /* default */
2366       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2367       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2368       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2369       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2370       if (issbaij) {
2371         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2372       } else {
2373         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2374       }
2375       /* Allow user's customization */
2376       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2377       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2378     }
2379     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2380     if (!n_R) {
2381       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2382       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2383     }
2384     /* Reuse MUMPS solver if it is present */
2385     if (sub_schurs->reuse_mumps) {
2386       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2387 
2388       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2389     }
2390     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2391     /* Set Up KSP for Neumann problem of BDDC */
2392     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2393   }
2394   /* free Neumann problem's matrix */
2395   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2396 
2397   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2398   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2399     if (pcbddc->dbg_flag) {
2400       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2401       ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2402       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2403     }
2404     if (dirichlet) { /* Dirichlet */
2405       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2406       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2407       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2408       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2409       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2410       /* need to be adapted? */
2411       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2412       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2413       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2414       /* print info */
2415       if (pcbddc->dbg_flag) {
2416         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2417         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2418       }
2419       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2420         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2421       }
2422     }
2423     if (neumann) { /* Neumann */
2424       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2425       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2426       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2427       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2428       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2429       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2430       /* need to be adapted? */
2431       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2432       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2433       /* print info */
2434       if (pcbddc->dbg_flag) {
2435         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2436         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2437       }
2438       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2439         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2440       }
2441     }
2442   }
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2448 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2449 {
2450   PetscErrorCode  ierr;
2451   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2452   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2453 
2454   PetscFunctionBegin;
2455   if (!sub_schurs->reuse_mumps) {
2456     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2457   }
2458   if (!pcbddc->switch_static) {
2459     if (applytranspose && pcbddc->local_auxmat1) {
2460       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2461       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2462     }
2463     if (!sub_schurs->reuse_mumps) {
2464       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2465       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466     } else {
2467       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2468 
2469       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2470       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2471     }
2472   } else {
2473     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2474     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2475     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2476     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2477     if (applytranspose && pcbddc->local_auxmat1) {
2478       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2479       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2480       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2481       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2482     }
2483   }
2484   if (!sub_schurs->reuse_mumps) {
2485     if (applytranspose) {
2486       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2487     } else {
2488       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2489     }
2490   } else {
2491     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2492 
2493     if (applytranspose) {
2494       ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2495     } else {
2496       ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2497     }
2498   }
2499   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2500   if (!pcbddc->switch_static) {
2501     if (!sub_schurs->reuse_mumps) {
2502       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2503       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2504     } else {
2505       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2506 
2507       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2508       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2509     }
2510     if (!applytranspose && pcbddc->local_auxmat1) {
2511       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2512       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2513     }
2514   } else {
2515     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2516     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2517     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2518     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2519     if (!applytranspose && pcbddc->local_auxmat1) {
2520       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2521       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2522     }
2523     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2525     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2526     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2527   }
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2532 #undef __FUNCT__
2533 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2534 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2535 {
2536   PetscErrorCode ierr;
2537   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2538   PC_IS*            pcis = (PC_IS*)  (pc->data);
2539   const PetscScalar zero = 0.0;
2540 
2541   PetscFunctionBegin;
2542   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2543   if (applytranspose) {
2544     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2545     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2546   } else {
2547     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2548     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2549   }
2550 
2551   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2552   if (pcbddc->benign_n) {
2553     PetscScalar *array;
2554     PetscInt    j;
2555 
2556     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2557     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2558     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2559   }
2560 
2561   /* start communications from local primal nodes to rhs of coarse solver */
2562   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2563   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2564   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2565 
2566   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2567   /* TODO remove null space when doing multilevel */
2568   if (pcbddc->coarse_ksp) {
2569     Mat coarse_mat;
2570     Vec rhs,sol;
2571     MatNullSpace nullsp;
2572 
2573     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2574     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2575     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2576     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2577     if (nullsp) {
2578       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2579     }
2580     if (applytranspose) {
2581       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2582     } else {
2583       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2584     }
2585     if (nullsp) {
2586       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2587     }
2588   }
2589 
2590   /* Local solution on R nodes */
2591   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2592     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2593   }
2594 
2595   /* communications from coarse sol to local primal nodes */
2596   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2597   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2598 
2599   /* Sum contributions from two levels */
2600   if (applytranspose) {
2601     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2602     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2603   } else {
2604     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2605     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2606   }
2607   /* store p0 */
2608   if (pcbddc->benign_n) {
2609     PetscScalar *array;
2610     PetscInt    j;
2611 
2612     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2613     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2614     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2615   }
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2621 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2622 {
2623   PetscErrorCode ierr;
2624   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2625   PetscScalar    *array;
2626   Vec            from,to;
2627 
2628   PetscFunctionBegin;
2629   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2630     from = pcbddc->coarse_vec;
2631     to = pcbddc->vec1_P;
2632     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2633       Vec tvec;
2634 
2635       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2636       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2637       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2638       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2639       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2640       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2641     }
2642   } else { /* from local to global -> put data in coarse right hand side */
2643     from = pcbddc->vec1_P;
2644     to = pcbddc->coarse_vec;
2645   }
2646   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2652 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2653 {
2654   PetscErrorCode ierr;
2655   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2656   PetscScalar    *array;
2657   Vec            from,to;
2658 
2659   PetscFunctionBegin;
2660   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2661     from = pcbddc->coarse_vec;
2662     to = pcbddc->vec1_P;
2663   } else { /* from local to global -> put data in coarse right hand side */
2664     from = pcbddc->vec1_P;
2665     to = pcbddc->coarse_vec;
2666   }
2667   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2668   if (smode == SCATTER_FORWARD) {
2669     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2670       Vec tvec;
2671 
2672       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2673       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2674       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2675       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2676     }
2677   } else {
2678     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2679      ierr = VecResetArray(from);CHKERRQ(ierr);
2680     }
2681   }
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 /* uncomment for testing purposes */
2686 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2687 #undef __FUNCT__
2688 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2689 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2690 {
2691   PetscErrorCode    ierr;
2692   PC_IS*            pcis = (PC_IS*)(pc->data);
2693   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2694   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2695   /* one and zero */
2696   PetscScalar       one=1.0,zero=0.0;
2697   /* space to store constraints and their local indices */
2698   PetscScalar       *constraints_data;
2699   PetscInt          *constraints_idxs,*constraints_idxs_B;
2700   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2701   PetscInt          *constraints_n;
2702   /* iterators */
2703   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2704   /* BLAS integers */
2705   PetscBLASInt      lwork,lierr;
2706   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2707   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2708   /* reuse */
2709   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2710   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2711   /* change of basis */
2712   PetscBool         qr_needed;
2713   PetscBT           change_basis,qr_needed_idx;
2714   /* auxiliary stuff */
2715   PetscInt          *nnz,*is_indices;
2716   PetscInt          ncc;
2717   /* some quantities */
2718   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2719   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2720 
2721   PetscFunctionBegin;
2722   /* Destroy Mat objects computed previously */
2723   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2724   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2725   /* save info on constraints from previous setup (if any) */
2726   olocal_primal_size = pcbddc->local_primal_size;
2727   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2728   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2729   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2730   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2731   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2732   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2733 
2734   /* print some info */
2735   if (pcbddc->dbg_flag) {
2736     IS       vertices;
2737     PetscInt nv,nedges,nfaces;
2738     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2739     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2740     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2741     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2742     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2743     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2744     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2745     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2746     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2747     ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2748   }
2749 
2750   if (!pcbddc->adaptive_selection) {
2751     IS           ISForVertices,*ISForFaces,*ISForEdges;
2752     MatNullSpace nearnullsp;
2753     const Vec    *nearnullvecs;
2754     Vec          *localnearnullsp;
2755     PetscScalar  *array;
2756     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2757     PetscBool    nnsp_has_cnst;
2758     /* LAPACK working arrays for SVD or POD */
2759     PetscBool    skip_lapack,boolforchange;
2760     PetscScalar  *work;
2761     PetscReal    *singular_vals;
2762 #if defined(PETSC_USE_COMPLEX)
2763     PetscReal    *rwork;
2764 #endif
2765 #if defined(PETSC_MISSING_LAPACK_GESVD)
2766     PetscScalar  *temp_basis,*correlation_mat;
2767 #else
2768     PetscBLASInt dummy_int=1;
2769     PetscScalar  dummy_scalar=1.;
2770 #endif
2771 
2772     /* Get index sets for faces, edges and vertices from graph */
2773     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2774     /* free unneeded index sets */
2775     if (!pcbddc->use_vertices) {
2776       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2777     }
2778     if (!pcbddc->use_edges) {
2779       for (i=0;i<n_ISForEdges;i++) {
2780         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2781       }
2782       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2783       n_ISForEdges = 0;
2784     }
2785     if (!pcbddc->use_faces) {
2786       for (i=0;i<n_ISForFaces;i++) {
2787         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2788       }
2789       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2790       n_ISForFaces = 0;
2791     }
2792 
2793 #if defined(PETSC_USE_DEBUG)
2794     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2795        Also use_change_of_basis should be consistent among processors */
2796     if (pcbddc->NullSpace) {
2797       PetscBool tbool[2],gbool[2];
2798 
2799       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2800         pcbddc->use_change_of_basis = PETSC_TRUE;
2801         if (!ISForEdges) {
2802           pcbddc->use_change_on_faces = PETSC_TRUE;
2803         }
2804       }
2805       tbool[0] = pcbddc->use_change_of_basis;
2806       tbool[1] = pcbddc->use_change_on_faces;
2807       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2808       pcbddc->use_change_of_basis = gbool[0];
2809       pcbddc->use_change_on_faces = gbool[1];
2810     }
2811 #endif
2812 
2813     /* check if near null space is attached to global mat */
2814     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2815     if (nearnullsp) {
2816       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2817       /* remove any stored info */
2818       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2819       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2820       /* store information for BDDC solver reuse */
2821       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2822       pcbddc->onearnullspace = nearnullsp;
2823       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2824       for (i=0;i<nnsp_size;i++) {
2825         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2826       }
2827     } else { /* if near null space is not provided BDDC uses constants by default */
2828       nnsp_size = 0;
2829       nnsp_has_cnst = PETSC_TRUE;
2830     }
2831     /* get max number of constraints on a single cc */
2832     max_constraints = nnsp_size;
2833     if (nnsp_has_cnst) max_constraints++;
2834 
2835     /*
2836          Evaluate maximum storage size needed by the procedure
2837          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2838          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2839          There can be multiple constraints per connected component
2840                                                                                                                                                            */
2841     n_vertices = 0;
2842     if (ISForVertices) {
2843       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2844     }
2845     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2846     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2847 
2848     total_counts = n_ISForFaces+n_ISForEdges;
2849     total_counts *= max_constraints;
2850     total_counts += n_vertices;
2851     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2852 
2853     total_counts = 0;
2854     max_size_of_constraint = 0;
2855     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2856       IS used_is;
2857       if (i<n_ISForEdges) {
2858         used_is = ISForEdges[i];
2859       } else {
2860         used_is = ISForFaces[i-n_ISForEdges];
2861       }
2862       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2863       total_counts += j;
2864       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2865     }
2866     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2867 
2868     /* get local part of global near null space vectors */
2869     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2870     for (k=0;k<nnsp_size;k++) {
2871       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2872       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2873       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2874     }
2875 
2876     /* whether or not to skip lapack calls */
2877     skip_lapack = PETSC_TRUE;
2878     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2879 
2880     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2881     if (!skip_lapack) {
2882       PetscScalar temp_work;
2883 
2884 #if defined(PETSC_MISSING_LAPACK_GESVD)
2885       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2886       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2887       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2888       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2889 #if defined(PETSC_USE_COMPLEX)
2890       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2891 #endif
2892       /* now we evaluate the optimal workspace using query with lwork=-1 */
2893       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2894       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2895       lwork = -1;
2896       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2897 #if !defined(PETSC_USE_COMPLEX)
2898       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2899 #else
2900       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2901 #endif
2902       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2903       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2904 #else /* on missing GESVD */
2905       /* SVD */
2906       PetscInt max_n,min_n;
2907       max_n = max_size_of_constraint;
2908       min_n = max_constraints;
2909       if (max_size_of_constraint < max_constraints) {
2910         min_n = max_size_of_constraint;
2911         max_n = max_constraints;
2912       }
2913       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2914 #if defined(PETSC_USE_COMPLEX)
2915       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2916 #endif
2917       /* now we evaluate the optimal workspace using query with lwork=-1 */
2918       lwork = -1;
2919       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2920       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2921       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2922       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2923 #if !defined(PETSC_USE_COMPLEX)
2924       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2925 #else
2926       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2927 #endif
2928       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2929       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2930 #endif /* on missing GESVD */
2931       /* Allocate optimal workspace */
2932       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2933       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2934     }
2935     /* Now we can loop on constraining sets */
2936     total_counts = 0;
2937     constraints_idxs_ptr[0] = 0;
2938     constraints_data_ptr[0] = 0;
2939     /* vertices */
2940     if (n_vertices) {
2941       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2942       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2943       for (i=0;i<n_vertices;i++) {
2944         constraints_n[total_counts] = 1;
2945         constraints_data[total_counts] = 1.0;
2946         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2947         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2948         total_counts++;
2949       }
2950       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2951       n_vertices = total_counts;
2952     }
2953 
2954     /* edges and faces */
2955     total_counts_cc = total_counts;
2956     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2957       IS        used_is;
2958       PetscBool idxs_copied = PETSC_FALSE;
2959 
2960       if (ncc<n_ISForEdges) {
2961         used_is = ISForEdges[ncc];
2962         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2963       } else {
2964         used_is = ISForFaces[ncc-n_ISForEdges];
2965         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2966       }
2967       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2968 
2969       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2970       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2971       /* change of basis should not be performed on local periodic nodes */
2972       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2973       if (nnsp_has_cnst) {
2974         PetscScalar quad_value;
2975 
2976         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2977         idxs_copied = PETSC_TRUE;
2978 
2979         if (!pcbddc->use_nnsp_true) {
2980           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2981         } else {
2982           quad_value = 1.0;
2983         }
2984         for (j=0;j<size_of_constraint;j++) {
2985           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2986         }
2987         temp_constraints++;
2988         total_counts++;
2989       }
2990       for (k=0;k<nnsp_size;k++) {
2991         PetscReal real_value;
2992         PetscScalar *ptr_to_data;
2993 
2994         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2995         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2996         for (j=0;j<size_of_constraint;j++) {
2997           ptr_to_data[j] = array[is_indices[j]];
2998         }
2999         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
3000         /* check if array is null on the connected component */
3001         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3002         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
3003         if (real_value > 0.0) { /* keep indices and values */
3004           temp_constraints++;
3005           total_counts++;
3006           if (!idxs_copied) {
3007             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
3008             idxs_copied = PETSC_TRUE;
3009           }
3010         }
3011       }
3012       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3013       valid_constraints = temp_constraints;
3014       if (!pcbddc->use_nnsp_true && temp_constraints) {
3015         if (temp_constraints == 1) { /* just normalize the constraint */
3016           PetscScalar norm,*ptr_to_data;
3017 
3018           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3019           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3020           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
3021           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
3022           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
3023         } else { /* perform SVD */
3024           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
3025           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3026 
3027 #if defined(PETSC_MISSING_LAPACK_GESVD)
3028           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
3029              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
3030              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
3031                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3032                 from that computed using LAPACKgesvd
3033              -> This is due to a different computation of eigenvectors in LAPACKheev
3034              -> The quality of the POD-computed basis will be the same */
3035           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3036           /* Store upper triangular part of correlation matrix */
3037           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3038           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3039           for (j=0;j<temp_constraints;j++) {
3040             for (k=0;k<j+1;k++) {
3041               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3042             }
3043           }
3044           /* compute eigenvalues and eigenvectors of correlation matrix */
3045           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3046           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3047 #if !defined(PETSC_USE_COMPLEX)
3048           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3049 #else
3050           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3051 #endif
3052           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3053           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3054           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3055           j = 0;
3056           while (j < temp_constraints && singular_vals[j] < tol) j++;
3057           total_counts = total_counts-j;
3058           valid_constraints = temp_constraints-j;
3059           /* scale and copy POD basis into used quadrature memory */
3060           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3061           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3062           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3063           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3064           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3065           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3066           if (j<temp_constraints) {
3067             PetscInt ii;
3068             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3069             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3070             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3071             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3072             for (k=0;k<temp_constraints-j;k++) {
3073               for (ii=0;ii<size_of_constraint;ii++) {
3074                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3075               }
3076             }
3077           }
3078 #else  /* on missing GESVD */
3079           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3080           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3081           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3082           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3083 #if !defined(PETSC_USE_COMPLEX)
3084           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3085 #else
3086           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3087 #endif
3088           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3089           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3090           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3091           k = temp_constraints;
3092           if (k > size_of_constraint) k = size_of_constraint;
3093           j = 0;
3094           while (j < k && singular_vals[k-j-1] < tol) j++;
3095           valid_constraints = k-j;
3096           total_counts = total_counts-temp_constraints+valid_constraints;
3097 #endif /* on missing GESVD */
3098         }
3099       }
3100       /* update pointers information */
3101       if (valid_constraints) {
3102         constraints_n[total_counts_cc] = valid_constraints;
3103         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3104         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3105         /* set change_of_basis flag */
3106         if (boolforchange) {
3107           PetscBTSet(change_basis,total_counts_cc);
3108         }
3109         total_counts_cc++;
3110       }
3111     }
3112     /* free workspace */
3113     if (!skip_lapack) {
3114       ierr = PetscFree(work);CHKERRQ(ierr);
3115 #if defined(PETSC_USE_COMPLEX)
3116       ierr = PetscFree(rwork);CHKERRQ(ierr);
3117 #endif
3118       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3119 #if defined(PETSC_MISSING_LAPACK_GESVD)
3120       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3121       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3122 #endif
3123     }
3124     for (k=0;k<nnsp_size;k++) {
3125       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3126     }
3127     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3128     /* free index sets of faces, edges and vertices */
3129     for (i=0;i<n_ISForFaces;i++) {
3130       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3131     }
3132     if (n_ISForFaces) {
3133       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3134     }
3135     for (i=0;i<n_ISForEdges;i++) {
3136       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3137     }
3138     if (n_ISForEdges) {
3139       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3140     }
3141     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3142   } else {
3143     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3144 
3145     total_counts = 0;
3146     n_vertices = 0;
3147     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3148       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3149     }
3150     max_constraints = 0;
3151     total_counts_cc = 0;
3152     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3153       total_counts += pcbddc->adaptive_constraints_n[i];
3154       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3155       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3156     }
3157     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3158     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3159     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3160     constraints_data = pcbddc->adaptive_constraints_data;
3161     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3162     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3163     total_counts_cc = 0;
3164     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3165       if (pcbddc->adaptive_constraints_n[i]) {
3166         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3167       }
3168     }
3169 #if 0
3170     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3171     for (i=0;i<total_counts_cc;i++) {
3172       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3173       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3174       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3175         printf(" %d",constraints_idxs[j]);
3176       }
3177       printf("\n");
3178       printf("number of cc: %d\n",constraints_n[i]);
3179     }
3180     for (i=0;i<n_vertices;i++) {
3181       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3182     }
3183     for (i=0;i<sub_schurs->n_subs;i++) {
3184       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3185     }
3186 #endif
3187 
3188     max_size_of_constraint = 0;
3189     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3190     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3191     /* Change of basis */
3192     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3193     if (pcbddc->use_change_of_basis) {
3194       for (i=0;i<sub_schurs->n_subs;i++) {
3195         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3196           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3197         }
3198       }
3199     }
3200   }
3201   pcbddc->local_primal_size = total_counts;
3202   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3203 
3204   /* map constraints_idxs in boundary numbering */
3205   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3206   if (i != constraints_idxs_ptr[total_counts_cc]) {
3207     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3208   }
3209 
3210   /* Create constraint matrix */
3211   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3212   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3213   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3214 
3215   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3216   /* determine if a QR strategy is needed for change of basis */
3217   qr_needed = PETSC_FALSE;
3218   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3219   total_primal_vertices=0;
3220   pcbddc->local_primal_size_cc = 0;
3221   for (i=0;i<total_counts_cc;i++) {
3222     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3223     if (size_of_constraint == 1) {
3224       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3225       pcbddc->local_primal_size_cc += 1;
3226     } else if (PetscBTLookup(change_basis,i)) {
3227       for (k=0;k<constraints_n[i];k++) {
3228         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3229       }
3230       pcbddc->local_primal_size_cc += constraints_n[i];
3231       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3232         PetscBTSet(qr_needed_idx,i);
3233         qr_needed = PETSC_TRUE;
3234       }
3235     } else {
3236       pcbddc->local_primal_size_cc += 1;
3237     }
3238   }
3239   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3240   pcbddc->n_vertices = total_primal_vertices;
3241   /* permute indices in order to have a sorted set of vertices */
3242   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3243 
3244   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3245   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3246   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3247 
3248   /* nonzero structure of constraint matrix */
3249   /* and get reference dof for local constraints */
3250   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3251   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3252 
3253   j = total_primal_vertices;
3254   total_counts = total_primal_vertices;
3255   cum = total_primal_vertices;
3256   for (i=n_vertices;i<total_counts_cc;i++) {
3257     if (!PetscBTLookup(change_basis,i)) {
3258       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3259       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3260       cum++;
3261       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3262       for (k=0;k<constraints_n[i];k++) {
3263         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3264         nnz[j+k] = size_of_constraint;
3265       }
3266       j += constraints_n[i];
3267     }
3268   }
3269   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3270   ierr = PetscFree(nnz);CHKERRQ(ierr);
3271 
3272   /* set values in constraint matrix */
3273   for (i=0;i<total_primal_vertices;i++) {
3274     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3275   }
3276   total_counts = total_primal_vertices;
3277   for (i=n_vertices;i<total_counts_cc;i++) {
3278     if (!PetscBTLookup(change_basis,i)) {
3279       PetscInt *cols;
3280 
3281       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3282       cols = constraints_idxs+constraints_idxs_ptr[i];
3283       for (k=0;k<constraints_n[i];k++) {
3284         PetscInt    row = total_counts+k;
3285         PetscScalar *vals;
3286 
3287         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3288         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3289       }
3290       total_counts += constraints_n[i];
3291     }
3292   }
3293   /* assembling */
3294   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3295   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296 
3297   /*
3298   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3299   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3300   */
3301   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3302   if (pcbddc->use_change_of_basis) {
3303     /* dual and primal dofs on a single cc */
3304     PetscInt     dual_dofs,primal_dofs;
3305     /* working stuff for GEQRF */
3306     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3307     PetscBLASInt lqr_work;
3308     /* working stuff for UNGQR */
3309     PetscScalar  *gqr_work,lgqr_work_t;
3310     PetscBLASInt lgqr_work;
3311     /* working stuff for TRTRS */
3312     PetscScalar  *trs_rhs;
3313     PetscBLASInt Blas_NRHS;
3314     /* pointers for values insertion into change of basis matrix */
3315     PetscInt     *start_rows,*start_cols;
3316     PetscScalar  *start_vals;
3317     /* working stuff for values insertion */
3318     PetscBT      is_primal;
3319     PetscInt     *aux_primal_numbering_B;
3320     /* matrix sizes */
3321     PetscInt     global_size,local_size;
3322     /* temporary change of basis */
3323     Mat          localChangeOfBasisMatrix;
3324     /* extra space for debugging */
3325     PetscScalar  *dbg_work;
3326 
3327     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3328     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3329     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3330     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3331     /* nonzeros for local mat */
3332     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3333     for (i=0;i<pcis->n;i++) nnz[i]=1;
3334     for (i=n_vertices;i<total_counts_cc;i++) {
3335       if (PetscBTLookup(change_basis,i)) {
3336         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3337         if (PetscBTLookup(qr_needed_idx,i)) {
3338           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3339         } else {
3340           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3341           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3342         }
3343       }
3344     }
3345     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3346     ierr = PetscFree(nnz);CHKERRQ(ierr);
3347     /* Set initial identity in the matrix */
3348     for (i=0;i<pcis->n;i++) {
3349       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3350     }
3351 
3352     if (pcbddc->dbg_flag) {
3353       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3354       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3355     }
3356 
3357 
3358     /* Now we loop on the constraints which need a change of basis */
3359     /*
3360        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3361        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3362 
3363        Basic blocks of change of basis matrix T computed by
3364 
3365           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3366 
3367             | 1        0   ...        0         s_1/S |
3368             | 0        1   ...        0         s_2/S |
3369             |              ...                        |
3370             | 0        ...            1     s_{n-1}/S |
3371             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3372 
3373             with S = \sum_{i=1}^n s_i^2
3374             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3375                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3376 
3377           - QR decomposition of constraints otherwise
3378     */
3379     if (qr_needed) {
3380       /* space to store Q */
3381       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3382       /* first we issue queries for optimal work */
3383       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3384       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3385       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3386       lqr_work = -1;
3387       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3388       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3389       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3390       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3391       lgqr_work = -1;
3392       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3393       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3394       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3395       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3396       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3397       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3398       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3399       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3400       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3401       /* array to store scaling factors for reflectors */
3402       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3403       /* array to store rhs and solution of triangular solver */
3404       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3405       /* allocating workspace for check */
3406       if (pcbddc->dbg_flag) {
3407         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3408       }
3409     }
3410     /* array to store whether a node is primal or not */
3411     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3412     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3413     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3414     if (i != total_primal_vertices) {
3415       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3416     }
3417     for (i=0;i<total_primal_vertices;i++) {
3418       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3419     }
3420     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3421 
3422     /* loop on constraints and see whether or not they need a change of basis and compute it */
3423     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3424       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3425       if (PetscBTLookup(change_basis,total_counts)) {
3426         /* get constraint info */
3427         primal_dofs = constraints_n[total_counts];
3428         dual_dofs = size_of_constraint-primal_dofs;
3429 
3430         if (pcbddc->dbg_flag) {
3431           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3432         }
3433 
3434         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3435 
3436           /* copy quadrature constraints for change of basis check */
3437           if (pcbddc->dbg_flag) {
3438             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3439           }
3440           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3441           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3442 
3443           /* compute QR decomposition of constraints */
3444           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3445           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3446           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3447           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3448           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3449           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3450           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3451 
3452           /* explictly compute R^-T */
3453           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3454           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3455           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3456           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3457           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3458           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3459           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3460           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3461           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3462           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3463 
3464           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3465           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3466           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3467           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3468           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3469           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3470           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3471           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3472           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3473 
3474           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3475              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3476              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3477           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3478           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3479           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3480           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3481           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3482           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3483           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3484           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3485           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3486           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3487 
3488           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3489           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3490           /* insert cols for primal dofs */
3491           for (j=0;j<primal_dofs;j++) {
3492             start_vals = &qr_basis[j*size_of_constraint];
3493             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3494             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3495           }
3496           /* insert cols for dual dofs */
3497           for (j=0,k=0;j<dual_dofs;k++) {
3498             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3499               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3500               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3501               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3502               j++;
3503             }
3504           }
3505 
3506           /* check change of basis */
3507           if (pcbddc->dbg_flag) {
3508             PetscInt   ii,jj;
3509             PetscBool valid_qr=PETSC_TRUE;
3510             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3511             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3512             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3513             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3514             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3515             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3516             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3517             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3518             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3519             for (jj=0;jj<size_of_constraint;jj++) {
3520               for (ii=0;ii<primal_dofs;ii++) {
3521                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3522                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3523               }
3524             }
3525             if (!valid_qr) {
3526               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3527               for (jj=0;jj<size_of_constraint;jj++) {
3528                 for (ii=0;ii<primal_dofs;ii++) {
3529                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3530                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3531                   }
3532                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3533                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3534                   }
3535                 }
3536               }
3537             } else {
3538               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3539             }
3540           }
3541         } else { /* simple transformation block */
3542           PetscInt    row,col;
3543           PetscScalar val,norm;
3544 
3545           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3546           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3547           for (j=0;j<size_of_constraint;j++) {
3548             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3549             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3550             if (!PetscBTLookup(is_primal,row_B)) {
3551               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3552               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3553               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3554             } else {
3555               for (k=0;k<size_of_constraint;k++) {
3556                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3557                 if (row != col) {
3558                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3559                 } else {
3560                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3561                 }
3562                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3563               }
3564             }
3565           }
3566           if (pcbddc->dbg_flag) {
3567             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3568           }
3569         }
3570       } else {
3571         if (pcbddc->dbg_flag) {
3572           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3573         }
3574       }
3575     }
3576 
3577     /* free workspace */
3578     if (qr_needed) {
3579       if (pcbddc->dbg_flag) {
3580         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3581       }
3582       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3583       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3584       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3585       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3586       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3587     }
3588     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3589     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3590     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3591 
3592     /* assembling of global change of variable */
3593     {
3594       Mat      tmat;
3595       PetscInt bs;
3596 
3597       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3598       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3599       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3600       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3601       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3602       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3603       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3604       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3605       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3606       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3607       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3608       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3609       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3610       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3611       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3612       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3613       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3614       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3615     }
3616     /* check */
3617     if (pcbddc->dbg_flag) {
3618       PetscReal error;
3619       Vec       x,x_change;
3620 
3621       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3622       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3623       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3624       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3625       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3626       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3627       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3628       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3629       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3630       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3631       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3632       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3633       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3634       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3635       ierr = VecDestroy(&x);CHKERRQ(ierr);
3636       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3637     }
3638 
3639     /* adapt sub_schurs computed (if any) */
3640     if (pcbddc->use_deluxe_scaling) {
3641       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3642       if (sub_schurs->S_Ej_all) {
3643         Mat                    S_new,tmat;
3644         ISLocalToGlobalMapping NtoSall;
3645         IS                     is_all_N,is_V,is_V_Sall;
3646         const PetscScalar      *array;
3647         const PetscInt         *idxs_V,*idxs_all;
3648         PetscInt               i,n_V;
3649 
3650         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3651         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3652         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3653         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3654         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3655         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3656         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3657         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3658         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3659         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3660         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3661         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3662         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3663         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3664         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3665         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3666         for (i=0;i<n_V;i++) {
3667           PetscScalar val;
3668           PetscInt    idx;
3669 
3670           idx = idxs_V[i];
3671           val = array[idxs_all[idxs_V[i]]];
3672           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3673         }
3674         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3675         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3676         sub_schurs->S_Ej_all = S_new;
3677         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3678         if (sub_schurs->sum_S_Ej_all) {
3679           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3680           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3681           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3682           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3683           sub_schurs->sum_S_Ej_all = S_new;
3684           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3685         }
3686         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3687         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3688         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3689         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3690         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3691       }
3692     }
3693     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3694   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3695     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3696     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3697   }
3698 
3699   /* set up change of basis context */
3700   if (pcbddc->ChangeOfBasisMatrix) {
3701     PCBDDCChange_ctx change_ctx;
3702 
3703     if (!pcbddc->new_global_mat) {
3704       PetscInt global_size,local_size;
3705 
3706       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3707       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3708       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3709       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3710       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3711       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3712       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3713       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3714       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3715     } else {
3716       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3717       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3718       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3719     }
3720     if (!pcbddc->user_ChangeOfBasisMatrix) {
3721       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3722       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3723     } else {
3724       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3725       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3726     }
3727     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3728     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3729     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3730     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731   } else {
3732     ierr = MatDestroy(&pcbddc->new_global_mat);CHKERRQ(ierr);
3733   }
3734 
3735   /* add pressure dofs to set of primal nodes for numbering purposes */
3736   for (i=0;i<pcbddc->benign_n;i++) {
3737     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3738     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3739     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3740     pcbddc->local_primal_size_cc++;
3741     pcbddc->local_primal_size++;
3742   }
3743 
3744   /* check if a new primal space has been introduced (also take into account benign trick) */
3745   pcbddc->new_primal_space_local = PETSC_TRUE;
3746   if (olocal_primal_size == pcbddc->local_primal_size) {
3747     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3748     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3749     if (!pcbddc->new_primal_space_local) {
3750       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3751       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3752     }
3753   }
3754   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3755   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3756   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3757 
3758   /* flush dbg viewer */
3759   if (pcbddc->dbg_flag) {
3760     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3761   }
3762 
3763   /* free workspace */
3764   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3765   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3766   if (!pcbddc->adaptive_selection) {
3767     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3768     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3769   } else {
3770     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3771                       pcbddc->adaptive_constraints_idxs_ptr,
3772                       pcbddc->adaptive_constraints_data_ptr,
3773                       pcbddc->adaptive_constraints_idxs,
3774                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3775     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3776     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3777   }
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3783 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3784 {
3785   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3786   PC_IS       *pcis = (PC_IS*)pc->data;
3787   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3788   PetscInt    ierr,i,vertex_size,N;
3789   PetscViewer viewer=pcbddc->dbg_viewer;
3790 
3791   PetscFunctionBegin;
3792   /* Reset previously computed graph */
3793   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3794   /* Init local Graph struct */
3795   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3796   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3797 
3798   /* Check validity of the csr graph passed in by the user */
3799   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3800     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3801   }
3802 
3803   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3804   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3805     PetscInt  *xadj,*adjncy;
3806     PetscInt  nvtxs;
3807     PetscBool flg_row=PETSC_FALSE;
3808 
3809     if (pcbddc->use_local_adj) {
3810 
3811       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3812       if (flg_row) {
3813         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3814         pcbddc->computed_rowadj = PETSC_TRUE;
3815       }
3816       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3817     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3818       IS                     is_dummy;
3819       ISLocalToGlobalMapping l2gmap_dummy;
3820       PetscInt               j,sum;
3821       PetscInt               *cxadj,*cadjncy;
3822       const PetscInt         *idxs;
3823       PCBDDCGraph            graph;
3824       PetscBT                is_on_boundary;
3825 
3826       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3827       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3828       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3829       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3830       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3831       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3832       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3833       if (flg_row) {
3834         graph->xadj = xadj;
3835         graph->adjncy = adjncy;
3836       }
3837       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3838       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3839       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3840 
3841       if (pcbddc->dbg_flag) {
3842         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3843         for (i=0;i<graph->ncc;i++) {
3844           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3845         }
3846       }
3847 
3848       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3849       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3850       for (i=0;i<pcis->n_B;i++) {
3851         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3852       }
3853       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3854 
3855       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3856       sum = 0;
3857       for (i=0;i<graph->ncc;i++) {
3858         PetscInt sizecc = 0;
3859         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3860           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3861             sizecc++;
3862           }
3863         }
3864         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3865           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3866             cxadj[graph->queue[j]] = sizecc;
3867           }
3868         }
3869         sum += sizecc*sizecc;
3870       }
3871       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3872       sum = 0;
3873       for (i=0;i<pcis->n;i++) {
3874         PetscInt temp = cxadj[i];
3875         cxadj[i] = sum;
3876         sum += temp;
3877       }
3878       cxadj[pcis->n] = sum;
3879       for (i=0;i<graph->ncc;i++) {
3880         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3881           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3882             PetscInt k,sizecc = 0;
3883             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3884               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3885                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3886                 sizecc++;
3887               }
3888             }
3889           }
3890         }
3891       }
3892       if (sum) {
3893         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3894       } else {
3895         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3896         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3897       }
3898       graph->xadj = 0;
3899       graph->adjncy = 0;
3900       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3901       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3902     }
3903   }
3904   if (pcbddc->dbg_flag) {
3905     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3906   }
3907 
3908   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3909   vertex_size = 1;
3910   if (pcbddc->user_provided_isfordofs) {
3911     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3912       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3913       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3914         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3915         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3916       }
3917       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3918       pcbddc->n_ISForDofs = 0;
3919       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3920     }
3921     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3922     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3923   } else {
3924     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3925       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3926       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3927       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3928         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3929       }
3930     }
3931   }
3932 
3933   /* Setup of Graph */
3934   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3935     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3936   }
3937   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3938     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3939   }
3940   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */
3941     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3942   }
3943   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3944 
3945   /* attach info on disconnected subdomains if present */
3946   if (pcbddc->n_local_subs) {
3947     PetscInt *local_subs;
3948 
3949     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3950     for (i=0;i<pcbddc->n_local_subs;i++) {
3951       const PetscInt *idxs;
3952       PetscInt       nl,j;
3953 
3954       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3955       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3956       for (j=0;j<nl;j++) {
3957         local_subs[idxs[j]] = i;
3958       }
3959       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3960     }
3961     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3962     pcbddc->mat_graph->local_subs = local_subs;
3963   }
3964 
3965   /* Graph's connected components analysis */
3966   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3967 
3968   /* print some info to stdout */
3969   if (pcbddc->dbg_flag) {
3970     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3971   }
3972 
3973   /* mark topography has done */
3974   pcbddc->recompute_topography = PETSC_FALSE;
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3979 #undef __FUNCT__
3980 #define __FUNCT__ "PCBDDCSubsetNumbering"
3981 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3982 {
3983   PetscSF        sf;
3984   PetscLayout    map;
3985   const PetscInt *idxs;
3986   PetscInt       *leaf_data,*root_data,*gidxs;
3987   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3988   PetscInt       n_n,nlocals,start,first_index;
3989   PetscMPIInt    commsize;
3990   PetscBool      first_found;
3991   PetscErrorCode ierr;
3992 
3993   PetscFunctionBegin;
3994   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3995   if (subset_mult) {
3996     PetscCheckSameComm(subset,1,subset_mult,2);
3997     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3998     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3999   }
4000   /* create workspace layout for computing global indices of subset */
4001   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
4002   lbounds[0] = lbounds[1] = 0;
4003   for (i=0;i<n;i++) {
4004     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
4005     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
4006   }
4007   lbounds[0] = -lbounds[0];
4008   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4009   gbounds[0] = -gbounds[0];
4010   N = gbounds[1] - gbounds[0] + 1;
4011   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
4012   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
4013   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
4014   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
4015   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
4016 
4017   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
4018   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
4019   if (subset_mult) {
4020     const PetscInt* idxs_mult;
4021 
4022     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4023     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
4024     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4025   } else {
4026     for (i=0;i<n;i++) leaf_data[i] = 1;
4027   }
4028   /* local size of new subset */
4029   n_n = 0;
4030   for (i=0;i<n;i++) n_n += leaf_data[i];
4031 
4032   /* global indexes in layout */
4033   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
4034   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
4035   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
4036   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4037   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4038   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4039 
4040   /* reduce from leaves to roots */
4041   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4042   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4043   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4044 
4045   /* count indexes in local part of layout */
4046   nlocals = 0;
4047   first_index = -1;
4048   first_found = PETSC_FALSE;
4049   for (i=0;i<Nl;i++) {
4050     if (!first_found && root_data[i]) {
4051       first_found = PETSC_TRUE;
4052       first_index = i;
4053     }
4054     nlocals += root_data[i];
4055   }
4056 
4057   /* cumulative of number of indexes and size of subset without holes */
4058 #if defined(PETSC_HAVE_MPI_EXSCAN)
4059   start = 0;
4060   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4061 #else
4062   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4063   start = start-nlocals;
4064 #endif
4065 
4066   if (N_n) { /* compute total size of new subset if requested */
4067     *N_n = start + nlocals;
4068     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4069     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4070   }
4071 
4072   /* adapt root data with cumulative */
4073   if (first_found) {
4074     PetscInt old_index;
4075 
4076     root_data[first_index] += start;
4077     old_index = first_index;
4078     for (i=first_index+1;i<Nl;i++) {
4079       if (root_data[i]) {
4080         root_data[i] += root_data[old_index];
4081         old_index = i;
4082       }
4083     }
4084   }
4085 
4086   /* from roots to leaves */
4087   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4088   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4089   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4090 
4091   /* create new IS with global indexes without holes */
4092   if (subset_mult) {
4093     const PetscInt* idxs_mult;
4094     PetscInt        cum;
4095 
4096     cum = 0;
4097     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4098     for (i=0;i<n;i++) {
4099       PetscInt j;
4100       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4101     }
4102     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4103   } else {
4104     for (i=0;i<n;i++) {
4105       gidxs[i] = leaf_data[i]-1;
4106     }
4107   }
4108   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4109   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4110   PetscFunctionReturn(0);
4111 }
4112 
4113 #undef __FUNCT__
4114 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4115 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4116 {
4117   PetscInt       i,j;
4118   PetscScalar    *alphas;
4119   PetscErrorCode ierr;
4120 
4121   PetscFunctionBegin;
4122   /* this implements stabilized Gram-Schmidt */
4123   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4124   for (i=0;i<n;i++) {
4125     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4126     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4127     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4128   }
4129   ierr = PetscFree(alphas);CHKERRQ(ierr);
4130   PetscFunctionReturn(0);
4131 }
4132 
4133 #undef __FUNCT__
4134 #define __FUNCT__ "MatISGetSubassemblingPattern"
4135 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4136 {
4137   IS             ranks_send_to;
4138   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4139   PetscMPIInt    size,rank,color;
4140   PetscInt       *xadj,*adjncy;
4141   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4142   PetscInt       i,local_size,threshold=0;
4143   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4144   PetscSubcomm   subcomm;
4145   PetscErrorCode ierr;
4146 
4147   PetscFunctionBegin;
4148   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4149   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4150   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4151 
4152   /* Get info on mapping */
4153   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4154   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4155 
4156   /* build local CSR graph of subdomains' connectivity */
4157   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4158   xadj[0] = 0;
4159   xadj[1] = PetscMax(n_neighs-1,0);
4160   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4161   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4162 
4163   if (threshold) {
4164     PetscInt xadj_count = 0;
4165     for (i=1;i<n_neighs;i++) {
4166       if (n_shared[i] > threshold) {
4167         adjncy[xadj_count] = neighs[i];
4168         adjncy_wgt[xadj_count] = n_shared[i];
4169         xadj_count++;
4170       }
4171     }
4172     xadj[1] = xadj_count;
4173   } else {
4174     if (xadj[1]) {
4175       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4176       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4177     }
4178   }
4179   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4180   if (use_square) {
4181     for (i=0;i<xadj[1];i++) {
4182       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4183     }
4184   }
4185   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4186 
4187   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4188 
4189   /*
4190     Restrict work on active processes only.
4191   */
4192   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4193   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4195   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4196   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4197   if (color) {
4198     ierr = PetscFree(xadj);CHKERRQ(ierr);
4199     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4200     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4201   } else {
4202     Mat             subdomain_adj;
4203     IS              new_ranks,new_ranks_contig;
4204     MatPartitioning partitioner;
4205     PetscInt        prank,rstart=0,rend=0;
4206     PetscInt        *is_indices,*oldranks;
4207     PetscBool       aggregate;
4208 
4209     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4210     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4211     prank = rank;
4212     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4213     /*
4214     for (i=0;i<size;i++) {
4215       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4216     }
4217     */
4218     for (i=0;i<xadj[1];i++) {
4219       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4220     }
4221     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4222     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4223     if (aggregate) {
4224       PetscInt    lrows,row,ncols,*cols;
4225       PetscMPIInt nrank;
4226       PetscScalar *vals;
4227 
4228       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4229       lrows = 0;
4230       if (nrank<redprocs) {
4231         lrows = size/redprocs;
4232         if (nrank<size%redprocs) lrows++;
4233       }
4234       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4235       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4236       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4237       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4238       row = nrank;
4239       ncols = xadj[1]-xadj[0];
4240       cols = adjncy;
4241       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4242       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4243       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4244       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4245       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4246       ierr = PetscFree(xadj);CHKERRQ(ierr);
4247       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4248       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4249       ierr = PetscFree(vals);CHKERRQ(ierr);
4250     } else {
4251       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4252     }
4253     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4254 
4255     /* Partition */
4256     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4257     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4258     if (use_vwgt) {
4259       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4260       v_wgt[0] = local_size;
4261       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4262     }
4263     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4264     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4265     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4266     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4267     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4268 
4269     /* renumber new_ranks to avoid "holes" in new set of processors */
4270     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4271     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4272     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4273     if (!redprocs) {
4274       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4275     } else {
4276       PetscInt    idxs[1];
4277       PetscMPIInt tag;
4278       MPI_Request *reqs;
4279 
4280       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4281       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4282       for (i=rstart;i<rend;i++) {
4283         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4284       }
4285       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4286       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4287       ierr = PetscFree(reqs);CHKERRQ(ierr);
4288       ranks_send_to_idx[0] = oldranks[idxs[0]];
4289     }
4290     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4291     /* clean up */
4292     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4293     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4294     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4295     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4296   }
4297   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4298 
4299   /* assemble parallel IS for sends */
4300   i = 1;
4301   if (color) i=0;
4302   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4303   /* get back IS */
4304   *is_sends = ranks_send_to;
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4309 
4310 #undef __FUNCT__
4311 #define __FUNCT__ "MatISSubassemble"
4312 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4313 {
4314   Mat                    local_mat;
4315   IS                     is_sends_internal;
4316   PetscInt               rows,cols,new_local_rows;
4317   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4318   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4319   ISLocalToGlobalMapping l2gmap;
4320   PetscInt*              l2gmap_indices;
4321   const PetscInt*        is_indices;
4322   MatType                new_local_type;
4323   /* buffers */
4324   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4325   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4326   PetscInt               *recv_buffer_idxs_local;
4327   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4328   /* MPI */
4329   MPI_Comm               comm,comm_n;
4330   PetscSubcomm           subcomm;
4331   PetscMPIInt            n_sends,n_recvs,commsize;
4332   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4333   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4334   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4335   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4336   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4337   PetscErrorCode         ierr;
4338 
4339   PetscFunctionBegin;
4340   /* TODO: add missing checks */
4341   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4342   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4343   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4344   PetscValidLogicalCollectiveInt(mat,nis,7);
4345   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4346   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4347   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4348   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4349   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4350   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4351   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4352   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4353     PetscInt mrows,mcols,mnrows,mncols;
4354     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4355     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4356     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4357     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4358     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4359     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4360   }
4361   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4362   PetscValidLogicalCollectiveInt(mat,bs,0);
4363   /* prepare IS for sending if not provided */
4364   if (!is_sends) {
4365     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4366     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4367   } else {
4368     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4369     is_sends_internal = is_sends;
4370   }
4371 
4372   /* get comm */
4373   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4374 
4375   /* compute number of sends */
4376   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4377   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4378 
4379   /* compute number of receives */
4380   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4381   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4382   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4383   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4384   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4385   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4386   ierr = PetscFree(iflags);CHKERRQ(ierr);
4387 
4388   /* restrict comm if requested */
4389   subcomm = 0;
4390   destroy_mat = PETSC_FALSE;
4391   if (restrict_comm) {
4392     PetscMPIInt color,subcommsize;
4393 
4394     color = 0;
4395     if (restrict_full) {
4396       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4397     } else {
4398       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4399     }
4400     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4401     subcommsize = commsize - subcommsize;
4402     /* check if reuse has been requested */
4403     if (reuse == MAT_REUSE_MATRIX) {
4404       if (*mat_n) {
4405         PetscMPIInt subcommsize2;
4406         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4407         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4408         comm_n = PetscObjectComm((PetscObject)*mat_n);
4409       } else {
4410         comm_n = PETSC_COMM_SELF;
4411       }
4412     } else { /* MAT_INITIAL_MATRIX */
4413       PetscMPIInt rank;
4414 
4415       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4416       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4417       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4418       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4419       comm_n = PetscSubcommChild(subcomm);
4420     }
4421     /* flag to destroy *mat_n if not significative */
4422     if (color) destroy_mat = PETSC_TRUE;
4423   } else {
4424     comm_n = comm;
4425   }
4426 
4427   /* prepare send/receive buffers */
4428   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4429   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4430   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4431   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4432   if (nis) {
4433     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4434   }
4435 
4436   /* Get data from local matrices */
4437   if (!isdense) {
4438     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4439     /* TODO: See below some guidelines on how to prepare the local buffers */
4440     /*
4441        send_buffer_vals should contain the raw values of the local matrix
4442        send_buffer_idxs should contain:
4443        - MatType_PRIVATE type
4444        - PetscInt        size_of_l2gmap
4445        - PetscInt        global_row_indices[size_of_l2gmap]
4446        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4447     */
4448   } else {
4449     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4450     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4451     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4452     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4453     send_buffer_idxs[1] = i;
4454     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4455     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4456     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4457     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4458     for (i=0;i<n_sends;i++) {
4459       ilengths_vals[is_indices[i]] = len*len;
4460       ilengths_idxs[is_indices[i]] = len+2;
4461     }
4462   }
4463   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4464   /* additional is (if any) */
4465   if (nis) {
4466     PetscMPIInt psum;
4467     PetscInt j;
4468     for (j=0,psum=0;j<nis;j++) {
4469       PetscInt plen;
4470       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4471       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4472       psum += len+1; /* indices + lenght */
4473     }
4474     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4475     for (j=0,psum=0;j<nis;j++) {
4476       PetscInt plen;
4477       const PetscInt *is_array_idxs;
4478       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4479       send_buffer_idxs_is[psum] = plen;
4480       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4481       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4482       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4483       psum += plen+1; /* indices + lenght */
4484     }
4485     for (i=0;i<n_sends;i++) {
4486       ilengths_idxs_is[is_indices[i]] = psum;
4487     }
4488     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4489   }
4490 
4491   buf_size_idxs = 0;
4492   buf_size_vals = 0;
4493   buf_size_idxs_is = 0;
4494   for (i=0;i<n_recvs;i++) {
4495     buf_size_idxs += (PetscInt)olengths_idxs[i];
4496     buf_size_vals += (PetscInt)olengths_vals[i];
4497     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4498   }
4499   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4500   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4501   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4502 
4503   /* get new tags for clean communications */
4504   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4505   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4506   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4507 
4508   /* allocate for requests */
4509   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4510   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4511   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4512   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4513   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4514   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4515 
4516   /* communications */
4517   ptr_idxs = recv_buffer_idxs;
4518   ptr_vals = recv_buffer_vals;
4519   ptr_idxs_is = recv_buffer_idxs_is;
4520   for (i=0;i<n_recvs;i++) {
4521     source_dest = onodes[i];
4522     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4523     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4524     ptr_idxs += olengths_idxs[i];
4525     ptr_vals += olengths_vals[i];
4526     if (nis) {
4527       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4528       ptr_idxs_is += olengths_idxs_is[i];
4529     }
4530   }
4531   for (i=0;i<n_sends;i++) {
4532     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4533     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4534     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4535     if (nis) {
4536       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4537     }
4538   }
4539   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4540   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4541 
4542   /* assemble new l2g map */
4543   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4544   ptr_idxs = recv_buffer_idxs;
4545   new_local_rows = 0;
4546   for (i=0;i<n_recvs;i++) {
4547     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4548     ptr_idxs += olengths_idxs[i];
4549   }
4550   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4551   ptr_idxs = recv_buffer_idxs;
4552   new_local_rows = 0;
4553   for (i=0;i<n_recvs;i++) {
4554     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4555     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4556     ptr_idxs += olengths_idxs[i];
4557   }
4558   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4559   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4560   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4561 
4562   /* infer new local matrix type from received local matrices type */
4563   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4564   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4565   if (n_recvs) {
4566     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4567     ptr_idxs = recv_buffer_idxs;
4568     for (i=0;i<n_recvs;i++) {
4569       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4570         new_local_type_private = MATAIJ_PRIVATE;
4571         break;
4572       }
4573       ptr_idxs += olengths_idxs[i];
4574     }
4575     switch (new_local_type_private) {
4576       case MATDENSE_PRIVATE:
4577         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4578           new_local_type = MATSEQAIJ;
4579           bs = 1;
4580         } else { /* if I receive only 1 dense matrix */
4581           new_local_type = MATSEQDENSE;
4582           bs = 1;
4583         }
4584         break;
4585       case MATAIJ_PRIVATE:
4586         new_local_type = MATSEQAIJ;
4587         bs = 1;
4588         break;
4589       case MATBAIJ_PRIVATE:
4590         new_local_type = MATSEQBAIJ;
4591         break;
4592       case MATSBAIJ_PRIVATE:
4593         new_local_type = MATSEQSBAIJ;
4594         break;
4595       default:
4596         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4597         break;
4598     }
4599   } else { /* by default, new_local_type is seqdense */
4600     new_local_type = MATSEQDENSE;
4601     bs = 1;
4602   }
4603 
4604   /* create MATIS object if needed */
4605   if (reuse == MAT_INITIAL_MATRIX) {
4606     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4607     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4608   } else {
4609     /* it also destroys the local matrices */
4610     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4611   }
4612   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4613   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4614 
4615   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4616 
4617   /* Global to local map of received indices */
4618   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4619   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4620   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4621 
4622   /* restore attributes -> type of incoming data and its size */
4623   buf_size_idxs = 0;
4624   for (i=0;i<n_recvs;i++) {
4625     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4626     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4627     buf_size_idxs += (PetscInt)olengths_idxs[i];
4628   }
4629   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4630 
4631   /* set preallocation */
4632   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4633   if (!newisdense) {
4634     PetscInt *new_local_nnz=0;
4635 
4636     ptr_vals = recv_buffer_vals;
4637     ptr_idxs = recv_buffer_idxs_local;
4638     if (n_recvs) {
4639       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4640     }
4641     for (i=0;i<n_recvs;i++) {
4642       PetscInt j;
4643       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4644         for (j=0;j<*(ptr_idxs+1);j++) {
4645           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4646         }
4647       } else {
4648         /* TODO */
4649       }
4650       ptr_idxs += olengths_idxs[i];
4651     }
4652     if (new_local_nnz) {
4653       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4654       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4655       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4656       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4657       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4658       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4659     } else {
4660       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4661     }
4662     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4663   } else {
4664     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4665   }
4666 
4667   /* set values */
4668   ptr_vals = recv_buffer_vals;
4669   ptr_idxs = recv_buffer_idxs_local;
4670   for (i=0;i<n_recvs;i++) {
4671     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4672       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4673       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4674       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4675       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4676       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4677     } else {
4678       /* TODO */
4679     }
4680     ptr_idxs += olengths_idxs[i];
4681     ptr_vals += olengths_vals[i];
4682   }
4683   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4684   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4685   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4688   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4689 
4690 #if 0
4691   if (!restrict_comm) { /* check */
4692     Vec       lvec,rvec;
4693     PetscReal infty_error;
4694 
4695     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4696     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4697     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4698     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4699     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4700     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4701     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4702     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4703     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4704   }
4705 #endif
4706 
4707   /* assemble new additional is (if any) */
4708   if (nis) {
4709     PetscInt **temp_idxs,*count_is,j,psum;
4710 
4711     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4712     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4713     ptr_idxs = recv_buffer_idxs_is;
4714     psum = 0;
4715     for (i=0;i<n_recvs;i++) {
4716       for (j=0;j<nis;j++) {
4717         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4718         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4719         psum += plen;
4720         ptr_idxs += plen+1; /* shift pointer to received data */
4721       }
4722     }
4723     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4724     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4725     for (i=1;i<nis;i++) {
4726       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4727     }
4728     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4729     ptr_idxs = recv_buffer_idxs_is;
4730     for (i=0;i<n_recvs;i++) {
4731       for (j=0;j<nis;j++) {
4732         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4733         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4734         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4735         ptr_idxs += plen+1; /* shift pointer to received data */
4736       }
4737     }
4738     for (i=0;i<nis;i++) {
4739       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4740       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4741       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4742     }
4743     ierr = PetscFree(count_is);CHKERRQ(ierr);
4744     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4745     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4746   }
4747   /* free workspace */
4748   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4749   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4750   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4751   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4752   if (isdense) {
4753     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4754     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4755   } else {
4756     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4757   }
4758   if (nis) {
4759     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4760     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4761   }
4762   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4763   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4764   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4765   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4766   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4767   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4768   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4769   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4770   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4771   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4772   ierr = PetscFree(onodes);CHKERRQ(ierr);
4773   if (nis) {
4774     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4775     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4776     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4777   }
4778   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4779   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4780     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4781     for (i=0;i<nis;i++) {
4782       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4783     }
4784     *mat_n = NULL;
4785   }
4786   PetscFunctionReturn(0);
4787 }
4788 
4789 /* temporary hack into ksp private data structure */
4790 #include <petsc/private/kspimpl.h>
4791 
4792 #undef __FUNCT__
4793 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4794 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4795 {
4796   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4797   PC_IS                  *pcis = (PC_IS*)pc->data;
4798   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4799   MatNullSpace           CoarseNullSpace=NULL;
4800   ISLocalToGlobalMapping coarse_islg;
4801   IS                     coarse_is,*isarray;
4802   PetscInt               i,im_active=-1,active_procs=-1;
4803   PetscInt               nis,nisdofs,nisneu,nisvert;
4804   PC                     pc_temp;
4805   PCType                 coarse_pc_type;
4806   KSPType                coarse_ksp_type;
4807   PetscBool              multilevel_requested,multilevel_allowed;
4808   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4809   Mat                    t_coarse_mat_is;
4810   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4811   PetscMPIInt            all_procs;
4812   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4813   PetscBool              compute_vecs = PETSC_FALSE;
4814   PetscScalar            *array;
4815   PetscErrorCode         ierr;
4816 
4817   PetscFunctionBegin;
4818   /* Assign global numbering to coarse dofs */
4819   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4820     PetscInt ocoarse_size;
4821     compute_vecs = PETSC_TRUE;
4822     ocoarse_size = pcbddc->coarse_size;
4823     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4824     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4825     /* see if we can avoid some work */
4826     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4827       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4828       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4829         PC        pc;
4830         PetscBool isbddc;
4831 
4832         /* temporary workaround since PCBDDC does not have a reset method so far */
4833         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4834         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4835         if (isbddc) {
4836           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4837         }
4838         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4839         coarse_reuse = PETSC_FALSE;
4840       } else { /* we can safely reuse already computed coarse matrix */
4841         coarse_reuse = PETSC_TRUE;
4842       }
4843     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4844       coarse_reuse = PETSC_FALSE;
4845     }
4846     /* reset any subassembling information */
4847     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4848     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4849   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4850     coarse_reuse = PETSC_TRUE;
4851   }
4852 
4853   /* count "active" (i.e. with positive local size) and "void" processes */
4854   im_active = !!(pcis->n);
4855   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4856   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4857   void_procs = all_procs-active_procs;
4858   csin_type_simple = PETSC_TRUE;
4859   redist = PETSC_FALSE;
4860   if (pcbddc->current_level && void_procs) {
4861     csin_ml = PETSC_TRUE;
4862     ncoarse_ml = void_procs;
4863     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4864     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4865       csin_ds = PETSC_TRUE;
4866       ncoarse_ds = pcbddc->redistribute_coarse;
4867       redist = PETSC_TRUE;
4868     } else {
4869       csin_ds = PETSC_TRUE;
4870       ncoarse_ds = active_procs;
4871       redist = PETSC_TRUE;
4872     }
4873   } else {
4874     csin_ml = PETSC_FALSE;
4875     ncoarse_ml = all_procs;
4876     if (void_procs) {
4877       csin_ds = PETSC_TRUE;
4878       ncoarse_ds = void_procs;
4879       csin_type_simple = PETSC_FALSE;
4880     } else {
4881       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4882         csin_ds = PETSC_TRUE;
4883         ncoarse_ds = pcbddc->redistribute_coarse;
4884         redist = PETSC_TRUE;
4885       } else {
4886         csin_ds = PETSC_FALSE;
4887         ncoarse_ds = all_procs;
4888       }
4889     }
4890   }
4891 
4892   /*
4893     test if we can go multilevel: three conditions must be satisfied:
4894     - we have not exceeded the number of levels requested
4895     - we can actually subassemble the active processes
4896     - we can find a suitable number of MPI processes where we can place the subassembled problem
4897   */
4898   multilevel_allowed = PETSC_FALSE;
4899   multilevel_requested = PETSC_FALSE;
4900   if (pcbddc->current_level < pcbddc->max_levels) {
4901     multilevel_requested = PETSC_TRUE;
4902     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4903       multilevel_allowed = PETSC_FALSE;
4904     } else {
4905       multilevel_allowed = PETSC_TRUE;
4906     }
4907   }
4908   /* determine number of process partecipating to coarse solver */
4909   if (multilevel_allowed) {
4910     ncoarse = ncoarse_ml;
4911     csin = csin_ml;
4912     redist = PETSC_FALSE;
4913   } else {
4914     ncoarse = ncoarse_ds;
4915     csin = csin_ds;
4916   }
4917 
4918   /* creates temporary l2gmap and IS for coarse indexes */
4919   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4920   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4921 
4922   /* creates temporary MATIS object for coarse matrix */
4923   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4924   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4925   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4926   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4927   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4928   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4929   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4930   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4931   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4932 
4933   /* compute dofs splitting and neumann boundaries for coarse dofs */
4934   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */
4935     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4936     const PetscInt         *idxs;
4937     ISLocalToGlobalMapping tmap;
4938 
4939     /* create map between primal indices (in local representative ordering) and local primal numbering */
4940     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4941     /* allocate space for temporary storage */
4942     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4943     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4944     /* allocate for IS array */
4945     nisdofs = pcbddc->n_ISForDofsLocal;
4946     nisneu = !!pcbddc->NeumannBoundariesLocal;
4947     nisvert = 0;
4948     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4949       nisvert = 1;
4950     }
4951     nis = nisdofs + nisneu + nisvert;
4952     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4953     /* dofs splitting */
4954     for (i=0;i<nisdofs;i++) {
4955       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4956       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4957       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4958       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4959       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4960       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4961       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4962       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4963     }
4964     /* neumann boundaries */
4965     if (pcbddc->NeumannBoundariesLocal) {
4966       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4967       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4968       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4969       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4970       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4971       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4972       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4973       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4974     }
4975     /* primal vertices (benign) */
4976     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4977       ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr);
4978       ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4979       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4980       ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4981       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4982       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr);
4983     }
4984     /* free memory */
4985     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4986     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4987     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4988   } else {
4989     nis = 0;
4990     nisdofs = 0;
4991     nisneu = 0;
4992     nisvert = 0;
4993     isarray = NULL;
4994   }
4995   /* destroy no longer needed map */
4996   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4997 
4998   /* restrict on coarse candidates (if needed) */
4999   coarse_mat_is = NULL;
5000   if (csin) {
5001     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
5002       if (redist) {
5003         PetscMPIInt rank;
5004         PetscInt    spc,n_spc_p1,dest[1],destsize;
5005 
5006         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5007         spc = active_procs/ncoarse;
5008         n_spc_p1 = active_procs%ncoarse;
5009         if (im_active) {
5010           destsize = 1;
5011           if (rank > n_spc_p1*(spc+1)-1) {
5012             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
5013           } else {
5014             dest[0] = rank/(spc+1);
5015           }
5016         } else {
5017           destsize = 0;
5018         }
5019         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5020       } else if (csin_type_simple) {
5021         PetscMPIInt rank;
5022         PetscInt    issize,isidx;
5023 
5024         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5025         if (im_active) {
5026           issize = 1;
5027           isidx = (PetscInt)rank;
5028         } else {
5029           issize = 0;
5030           isidx = -1;
5031         }
5032         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5033       } else { /* get a suitable subassembling pattern from MATIS code */
5034         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5035       }
5036 
5037       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
5038       if (!redist || ncoarse <= void_procs) {
5039         PetscInt ncoarse_cand,tissize,*nisindices;
5040         PetscInt *coarse_candidates;
5041         const PetscInt* tisindices;
5042 
5043         /* get coarse candidates' ranks in pc communicator */
5044         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5045         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5046         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5047           if (!coarse_candidates[i]) {
5048             coarse_candidates[ncoarse_cand++]=i;
5049           }
5050         }
5051         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5052 
5053 
5054         if (pcbddc->dbg_flag) {
5055           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5056           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5057           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5058           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5059           for (i=0;i<ncoarse_cand;i++) {
5060             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5061           }
5062           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5063           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5064         }
5065         /* shift the pattern on coarse candidates */
5066         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5067         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5068         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5069         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5070         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5071         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5072         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5073       }
5074       if (pcbddc->dbg_flag) {
5075         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5076         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5077         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5078         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5079       }
5080     }
5081     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5082     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5083       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5084     } else { /* this is the last level, so use just receiving processes in subcomm */
5085       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5086     }
5087   } else {
5088     if (pcbddc->dbg_flag) {
5089       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5090       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5091       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5092     }
5093     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5094     coarse_mat_is = t_coarse_mat_is;
5095   }
5096 
5097   /* create local to global scatters for coarse problem */
5098   if (compute_vecs) {
5099     PetscInt lrows;
5100     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5101     if (coarse_mat_is) {
5102       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5103     } else {
5104       lrows = 0;
5105     }
5106     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5107     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5108     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5109     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5110     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5111   }
5112   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5113   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5114 
5115   /* set defaults for coarse KSP and PC */
5116   if (multilevel_allowed) {
5117     coarse_ksp_type = KSPRICHARDSON;
5118     coarse_pc_type = PCBDDC;
5119   } else {
5120     coarse_ksp_type = KSPPREONLY;
5121     coarse_pc_type = PCREDUNDANT;
5122   }
5123 
5124   /* print some info if requested */
5125   if (pcbddc->dbg_flag) {
5126     if (!multilevel_allowed) {
5127       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5128       if (multilevel_requested) {
5129         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5130       } else if (pcbddc->max_levels) {
5131         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5132       }
5133       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5134     }
5135   }
5136 
5137   /* create the coarse KSP object only once with defaults */
5138   if (coarse_mat_is) {
5139     MatReuse coarse_mat_reuse;
5140     PetscViewer dbg_viewer = NULL;
5141     if (pcbddc->dbg_flag) {
5142       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5143       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5144     }
5145     if (!pcbddc->coarse_ksp) {
5146       char prefix[256],str_level[16];
5147       size_t len;
5148       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5149       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5150       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5151       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5152       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5153       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5154       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5155       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5156       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5157       /* prefix */
5158       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5159       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5160       if (!pcbddc->current_level) {
5161         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5162         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5163       } else {
5164         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5165         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5166         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5167         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5168         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5169         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5170       }
5171       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5172       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5173       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5174       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5175       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5176       /* allow user customization */
5177       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5178     }
5179     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5180     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5181     if (nisdofs) {
5182       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5183       for (i=0;i<nisdofs;i++) {
5184         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5185       }
5186     }
5187     if (nisneu) {
5188       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5189       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5190     }
5191     if (nisvert) {
5192       ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr);
5193       ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr);
5194     }
5195 
5196     /* get some info after set from options */
5197     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5198     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5199     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5200     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5201       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5202       isbddc = PETSC_FALSE;
5203     }
5204     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5205     if (isredundant) {
5206       KSP inner_ksp;
5207       PC  inner_pc;
5208       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5209       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5210       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5211     }
5212 
5213     /* assemble coarse matrix */
5214     if (coarse_reuse) {
5215       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5216       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5217       coarse_mat_reuse = MAT_REUSE_MATRIX;
5218     } else {
5219       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5220     }
5221     if (isbddc || isnn) {
5222       if (pcbddc->coarsening_ratio > 1) {
5223         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5224           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5225           if (pcbddc->dbg_flag) {
5226             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5227             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5228             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5229             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5230           }
5231         }
5232         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5233       } else {
5234         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5235         coarse_mat = coarse_mat_is;
5236       }
5237     } else {
5238       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5239     }
5240     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5241 
5242     /* propagate symmetry info of coarse matrix */
5243     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5244     if (pc->pmat->symmetric_set) {
5245       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5246     }
5247     if (pc->pmat->hermitian_set) {
5248       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5249     }
5250     if (pc->pmat->spd_set) {
5251       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5252     }
5253     /* set operators */
5254     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5255     if (pcbddc->dbg_flag) {
5256       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5257     }
5258   } else { /* processes non partecipating to coarse solver (if any) */
5259     coarse_mat = 0;
5260   }
5261   ierr = PetscFree(isarray);CHKERRQ(ierr);
5262 #if 0
5263   {
5264     PetscViewer viewer;
5265     char filename[256];
5266     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5267     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5268     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5269     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5270     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5271   }
5272 #endif
5273 
5274   /* Compute coarse null space (special handling by BDDC only) */
5275 #if 0
5276   if (pcbddc->NullSpace) {
5277     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5278   }
5279 #endif
5280   /* hack */
5281   if (pcbddc->coarse_ksp) {
5282     Vec crhs,csol;
5283 
5284     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5285     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5286     if (!csol) {
5287       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5288     }
5289     if (!crhs) {
5290       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5291     }
5292   }
5293 
5294   /* compute null space for coarse solver if the benign trick has been requested */
5295   if (pcbddc->benign_null) {
5296 
5297     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5298     for (i=0;i<pcbddc->benign_n;i++) {
5299       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5300     }
5301     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5302     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5303     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5304     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5305     if (coarse_mat) {
5306       Vec         nullv;
5307       PetscScalar *array,*array2;
5308       PetscInt    nl;
5309 
5310       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5311       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5312       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5313       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5314       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5315       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5316       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5317       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5318       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5319       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5320     }
5321   }
5322 
5323   if (pcbddc->coarse_ksp) {
5324     PetscBool ispreonly;
5325 
5326     if (CoarseNullSpace) {
5327       PetscBool isnull;
5328       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5329       if (isnull) {
5330         if (isbddc && !pcbddc->benign_saddle_point) {
5331           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5332         } else {
5333           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5334         }
5335       } else {
5336         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5337       }
5338     }
5339     /* setup coarse ksp */
5340     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5341     /* Check coarse problem if in debug mode or if solving with an iterative method */
5342     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5343     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5344       KSP       check_ksp;
5345       KSPType   check_ksp_type;
5346       PC        check_pc;
5347       Vec       check_vec,coarse_vec;
5348       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5349       PetscInt  its;
5350       PetscBool compute_eigs;
5351       PetscReal *eigs_r,*eigs_c;
5352       PetscInt  neigs;
5353       const char *prefix;
5354 
5355       /* Create ksp object suitable for estimation of extreme eigenvalues */
5356       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5357       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5358       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5359       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5360       if (ispreonly) {
5361         check_ksp_type = KSPPREONLY;
5362         compute_eigs = PETSC_FALSE;
5363       } else {
5364         check_ksp_type = KSPGMRES;
5365         compute_eigs = PETSC_TRUE;
5366       }
5367       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5368       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5369       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5370       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5371       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5372       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5373       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5374       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5375       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5376       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5377       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5378       /* create random vec */
5379       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5380       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5381       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5382       if (CoarseNullSpace) {
5383         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5384       }
5385       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5386       /* solve coarse problem */
5387       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5388       if (CoarseNullSpace) {
5389         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5390       }
5391       /* set eigenvalue estimation if preonly has not been requested */
5392       if (compute_eigs) {
5393         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5394         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5395         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5396         lambda_max = eigs_r[neigs-1];
5397         lambda_min = eigs_r[0];
5398         if (pcbddc->use_coarse_estimates) {
5399           if (lambda_max>lambda_min) {
5400             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5401             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5402           }
5403         }
5404       }
5405 
5406       /* check coarse problem residual error */
5407       if (pcbddc->dbg_flag) {
5408         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5409         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5410         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5411         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5412         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5413         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5414         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5415         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5416         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5417         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5418         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5419         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5420         if (CoarseNullSpace) {
5421           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5422         }
5423         if (compute_eigs) {
5424           PetscReal lambda_max_s,lambda_min_s;
5425           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5426           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5427           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5428           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5429           for (i=0;i<neigs;i++) {
5430             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5431           }
5432         }
5433         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5434         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5435       }
5436       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5437       if (compute_eigs) {
5438         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5439         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5440       }
5441     }
5442   }
5443   /* print additional info */
5444   if (pcbddc->dbg_flag) {
5445     /* waits until all processes reaches this point */
5446     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5447     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5448     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5449   }
5450 
5451   /* free memory */
5452   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5453   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5454   PetscFunctionReturn(0);
5455 }
5456 
5457 #undef __FUNCT__
5458 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5459 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5460 {
5461   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5462   PC_IS*         pcis = (PC_IS*)pc->data;
5463   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5464   IS             subset,subset_mult,subset_n;
5465   PetscInt       local_size,coarse_size=0;
5466   PetscInt       *local_primal_indices=NULL;
5467   const PetscInt *t_local_primal_indices;
5468   PetscErrorCode ierr;
5469 
5470   PetscFunctionBegin;
5471   /* Compute global number of coarse dofs */
5472   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5473     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5474   }
5475   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5476   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5477   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5478   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5479   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5480   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5481   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5482   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5483   if (local_size != pcbddc->local_primal_size) {
5484     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5485   }
5486   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5487   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5488   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5489   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5490   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5491 
5492   /* check numbering */
5493   if (pcbddc->dbg_flag) {
5494     PetscScalar coarsesum,*array,*array2;
5495     PetscInt    i;
5496     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5497 
5498     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5499     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5500     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5501     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5502     /* counter */
5503     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5504     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5505     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5506     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5507     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5508     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5509     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5510     for (i=0;i<pcbddc->local_primal_size;i++) {
5511       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5512     }
5513     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5514     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5515     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5516     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5517     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5518     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5519     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5520     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5521     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5522     for (i=0;i<pcis->n;i++) {
5523       if (array[i] != 0.0 && array[i] != array2[i]) {
5524         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5525         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5526         set_error = PETSC_TRUE;
5527         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5528       }
5529     }
5530     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5531     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5532     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5533     for (i=0;i<pcis->n;i++) {
5534       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5535     }
5536     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5537     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5538     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5539     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5540     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5541     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5542     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5543       PetscInt *gidxs;
5544 
5545       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5546       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5547       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5548       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5549       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5550       for (i=0;i<pcbddc->local_primal_size;i++) {
5551         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5552       }
5553       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5554       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5555     }
5556     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5557     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5558     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5559   }
5560   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5561   /* get back data */
5562   *coarse_size_n = coarse_size;
5563   *local_primal_indices_n = local_primal_indices;
5564   PetscFunctionReturn(0);
5565 }
5566 
5567 #undef __FUNCT__
5568 #define __FUNCT__ "PCBDDCGlobalToLocal"
5569 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5570 {
5571   IS             localis_t;
5572   PetscInt       i,lsize,*idxs,n;
5573   PetscScalar    *vals;
5574   PetscErrorCode ierr;
5575 
5576   PetscFunctionBegin;
5577   /* get indices in local ordering exploiting local to global map */
5578   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5579   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5580   for (i=0;i<lsize;i++) vals[i] = 1.0;
5581   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5582   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5583   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5584   if (idxs) { /* multilevel guard */
5585     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5586   }
5587   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5588   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5589   ierr = PetscFree(vals);CHKERRQ(ierr);
5590   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5591   /* now compute set in local ordering */
5592   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5593   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5594   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5595   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5596   for (i=0,lsize=0;i<n;i++) {
5597     if (PetscRealPart(vals[i]) > 0.5) {
5598       lsize++;
5599     }
5600   }
5601   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5602   for (i=0,lsize=0;i<n;i++) {
5603     if (PetscRealPart(vals[i]) > 0.5) {
5604       idxs[lsize++] = i;
5605     }
5606   }
5607   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5608   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5609   *localis = localis_t;
5610   PetscFunctionReturn(0);
5611 }
5612 
5613 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5614 #undef __FUNCT__
5615 #define __FUNCT__ "PCBDDCMatMult_Private"
5616 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5617 {
5618   PCBDDCChange_ctx change_ctx;
5619   PetscErrorCode   ierr;
5620 
5621   PetscFunctionBegin;
5622   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5623   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5624   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5625   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5626   PetscFunctionReturn(0);
5627 }
5628 
5629 #undef __FUNCT__
5630 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5631 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5632 {
5633   PCBDDCChange_ctx change_ctx;
5634   PetscErrorCode   ierr;
5635 
5636   PetscFunctionBegin;
5637   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5638   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5639   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5640   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5641   PetscFunctionReturn(0);
5642 }
5643 
5644 #undef __FUNCT__
5645 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5646 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5647 {
5648   PC_IS               *pcis=(PC_IS*)pc->data;
5649   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5650   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5651   Mat                 S_j;
5652   PetscInt            *used_xadj,*used_adjncy;
5653   PetscBool           free_used_adj;
5654   PetscErrorCode      ierr;
5655 
5656   PetscFunctionBegin;
5657   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5658   free_used_adj = PETSC_FALSE;
5659   if (pcbddc->sub_schurs_layers == -1) {
5660     used_xadj = NULL;
5661     used_adjncy = NULL;
5662   } else {
5663     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5664       used_xadj = pcbddc->mat_graph->xadj;
5665       used_adjncy = pcbddc->mat_graph->adjncy;
5666     } else if (pcbddc->computed_rowadj) {
5667       used_xadj = pcbddc->mat_graph->xadj;
5668       used_adjncy = pcbddc->mat_graph->adjncy;
5669     } else {
5670       PetscBool      flg_row=PETSC_FALSE;
5671       const PetscInt *xadj,*adjncy;
5672       PetscInt       nvtxs;
5673 
5674       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5675       if (flg_row) {
5676         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5677         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5678         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5679         free_used_adj = PETSC_TRUE;
5680       } else {
5681         pcbddc->sub_schurs_layers = -1;
5682         used_xadj = NULL;
5683         used_adjncy = NULL;
5684       }
5685       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5686     }
5687   }
5688 
5689   /* setup sub_schurs data */
5690   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5691   if (!sub_schurs->use_mumps) {
5692     /* pcbddc->ksp_D up to date only if not using MUMPS */
5693     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5694     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL);CHKERRQ(ierr);
5695   } else {
5696     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5697     PetscBool isseqaij;
5698     PetscInt  i;
5699     if (!pcbddc->use_vertices && reuse_solvers) {
5700       PetscInt n_vertices;
5701 
5702       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5703       reuse_solvers = (PetscBool)!n_vertices;
5704     }
5705     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5706     if (!isseqaij) {
5707       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5708       if (matis->A == pcbddc->local_mat) {
5709         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5710         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5711       } else {
5712         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5713       }
5714     }
5715     if (!pcbddc->benign_n) {
5716       ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,0,NULL,NULL);CHKERRQ(ierr);
5717     } else {
5718       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5719       ierr = PCBDDCSubSchursSetUp(sub_schurs,matis->A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs);CHKERRQ(ierr);
5720     }
5721     /* we no longer need these index sets */
5722     for (i=0;i<pcbddc->benign_n;i++) {
5723       ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr);
5724     }
5725     ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr);
5726   }
5727   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5728 
5729   /* free adjacency */
5730   if (free_used_adj) {
5731     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5732   }
5733   PetscFunctionReturn(0);
5734 }
5735 
5736 #undef __FUNCT__
5737 #define __FUNCT__ "PCBDDCInitSubSchurs"
5738 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5739 {
5740   PC_IS               *pcis=(PC_IS*)pc->data;
5741   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5742   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5743   PCBDDCGraph         graph;
5744   PetscErrorCode      ierr;
5745 
5746   PetscFunctionBegin;
5747   /* attach interface graph for determining subsets */
5748   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5749     IS       verticesIS,verticescomm;
5750     PetscInt vsize,*idxs;
5751 
5752     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5753     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5754     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5755     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5756     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5757     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5758     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5759     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5760     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5761     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5762     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5763 /*
5764     if (pcbddc->dbg_flag) {
5765       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5766     }
5767 */
5768   } else {
5769     graph = pcbddc->mat_graph;
5770   }
5771 
5772   /* sub_schurs init */
5773   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5774 
5775   /* free graph struct */
5776   if (pcbddc->sub_schurs_rebuild) {
5777     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5778   }
5779   PetscFunctionReturn(0);
5780 }
5781 
5782 #undef __FUNCT__
5783 #define __FUNCT__ "PCBDDCCheckOperator"
5784 PetscErrorCode PCBDDCCheckOperator(PC pc)
5785 {
5786   PC_IS               *pcis=(PC_IS*)pc->data;
5787   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5788   PetscErrorCode      ierr;
5789 
5790   PetscFunctionBegin;
5791   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5792     IS             zerodiag = NULL;
5793     Mat            S_j,B0_B=NULL;
5794     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5795     PetscScalar    *p0_check,*array,*array2;
5796     PetscReal      norm;
5797     PetscInt       i;
5798 
5799     /* B0 and B0_B */
5800     if (zerodiag) {
5801       IS       dummy;
5802 
5803       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5804       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5805       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5806       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5807     }
5808     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5809     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5810     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5811     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5812     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5813     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5814     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5815     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5816     /* S_j */
5817     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5818     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5819 
5820     /* mimic vector in \widetilde{W}_\Gamma */
5821     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5822     /* continuous in primal space */
5823     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5824     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5825     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5826     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5827     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5828     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5829     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5830     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5831     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5832     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5833     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5834     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5835     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5836     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5837 
5838     /* assemble rhs for coarse problem */
5839     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5840     /* local with Schur */
5841     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5842     if (zerodiag) {
5843       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5844       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5845       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5846       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5847     }
5848     /* sum on primal nodes the local contributions */
5849     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5850     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5851     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5852     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5853     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5854     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5855     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5856     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5857     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5858     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5859     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5860     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5861     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5862     /* scale primal nodes (BDDC sums contibutions) */
5863     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5864     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5865     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5866     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5867     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5868     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5869     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5870     /* global: \widetilde{B0}_B w_\Gamma */
5871     if (zerodiag) {
5872       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5873       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5874       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5875       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5876     }
5877     /* BDDC */
5878     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5879     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5880 
5881     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5882     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5883     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5884     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5885     for (i=0;i<pcbddc->benign_n;i++) {
5886       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5887     }
5888     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5889     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5890     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5891     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5892     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5893     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5894   }
5895   PetscFunctionReturn(0);
5896 }
5897