xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 6dba178d53d088f8af27c39b86adaa0496f0d2e2)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146   /* partial clean up */
147   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
148   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
149   if (A != B) {
150     ierr = MatDestroy(&B);CHKERRQ(ierr);
151   }
152 
153   /* get back data */
154   if (ncc) *ncc = graph->ncc;
155   if (cc) {
156     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
157     for (i=0;i<graph->ncc;i++) {
158       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
159     }
160     *cc = cc_n;
161   }
162   /* clean up graph */
163   graph->xadj = 0;
164   graph->adjncy = 0;
165   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "PCBDDCBenignCheck"
171 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
172 {
173   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
174   PC_IS*         pcis = (PC_IS*)(pc->data);
175   IS             dirIS = NULL;
176   PetscInt       i;
177   PetscErrorCode ierr;
178 
179   PetscFunctionBegin;
180   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
181   if (zerodiag) {
182     Mat            A;
183     Vec            vec3_N;
184     PetscScalar    *vals;
185     const PetscInt *idxs;
186     PetscInt       nz;
187 
188     /* p0 */
189     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
190     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
191     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
192     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
193     for (i=0;i<nz;i++) vals[i] = 1.;
194     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
195     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
196     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
197     /* v_I */
198     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
199     for (i=0;i<nz;i++) vals[i] = 0.;
200     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
201     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
202     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
203     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
204     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
205     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
206     if (dirIS) {
207       PetscInt n;
208 
209       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
210       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
211       for (i=0;i<n;i++) vals[i] = 0.;
212       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
213       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
214     }
215     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
216     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
218     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
219     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
220     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
221     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
222     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
223       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
224     }
225     ierr = PetscFree(vals);CHKERRQ(ierr);
226     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
227   }
228   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
229 
230   /* check PCBDDCBenignGetOrSetP0 */
231   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
232   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
233   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
234   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
235   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
236   for (i=0;i<pcbddc->benign_n;i++) {
237     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
238       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
239     }
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
246 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
247 {
248   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
249   IS             pressures,zerodiag,*zerodiag_subs;
250   PetscInt       nz,n;
251   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
256   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
257   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
258   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
259      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
260      Checks if all the pressure dofs in each subdomain have a zero diagonal
261      If not, a change of basis on pressures is not needed
262      since the local Schur complements are SPD
263   */
264   has_null_pressures = PETSC_TRUE;
265   have_null = PETSC_TRUE;
266   if (pcbddc->n_ISForDofsLocal) {
267     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
268 
269     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
270     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
271     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
272     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
273     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
274     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
275     if (!sorted) {
276       ierr = ISSort(pressures);CHKERRQ(ierr);
277     }
278   } else {
279     pressures = NULL;
280   }
281   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
282   ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr);
283   /* TODO: add check for shared dofs and raise error */
284   ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr);
285   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
286   if (!sorted) {
287     ierr = ISSort(zerodiag);CHKERRQ(ierr);
288   }
289   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
290   if (!nz) {
291     if (n) have_null = PETSC_FALSE;
292     has_null_pressures = PETSC_FALSE;
293     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
294   }
295   recompute_zerodiag = PETSC_FALSE;
296   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
297   zerodiag_subs = NULL;
298   pcbddc->benign_n = 0;
299   if (has_null_pressures) {
300     IS       *subs;
301     PetscInt nsubs,i;
302 
303     subs = pcbddc->local_subs;
304     nsubs = pcbddc->n_local_subs;
305     if (nsubs > 1) {
306       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
307       for (i=0;i<nsubs;i++) {
308         ISLocalToGlobalMapping l2g;
309         IS                     t_zerodiag_subs;
310         PetscInt               nl;
311 
312         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
313         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
314         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
315         if (nl) {
316           PetscBool valid = PETSC_TRUE;
317 
318           if (pressures) {
319             IS t_pressure_subs;
320             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
321             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
322             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
323           }
324           if (valid) {
325             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
326             pcbddc->benign_n++;
327           } else {
328             recompute_zerodiag = PETSC_TRUE;
329           }
330         }
331         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
332         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
333       }
334     } else { /* there's just one subdomain (or zero if they have not been detected */
335       PetscBool valid = PETSC_TRUE;
336       if (pressures) {
337         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
338       }
339       if (valid) {
340         pcbddc->benign_n = 1;
341         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
342         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
343         zerodiag_subs[0] = zerodiag;
344       }
345     }
346   }
347 
348   if (!pcbddc->benign_n) {
349     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
350     recompute_zerodiag = PETSC_FALSE;
351     has_null_pressures = PETSC_FALSE;
352     have_null = PETSC_FALSE;
353   }
354 
355   /* final check for null pressures */
356   if (zerodiag && pressures) {
357     PetscInt nz,np;
358     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
359     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
360     if (nz != np) have_null = PETSC_FALSE;
361   }
362 
363   if (recompute_zerodiag) {
364     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
365     if (pcbddc->benign_n == 1) {
366       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
367       zerodiag = zerodiag_subs[0];
368     } else {
369       PetscInt i,nzn,*new_idxs;
370 
371       nzn = 0;
372       for (i=0;i<pcbddc->benign_n;i++) {
373         PetscInt ns;
374         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
375         nzn += ns;
376       }
377       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
378       nzn = 0;
379       for (i=0;i<pcbddc->benign_n;i++) {
380         PetscInt ns,*idxs;
381         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
382         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
383         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
384         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         nzn += ns;
386       }
387       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
388       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
389     }
390     have_null = PETSC_FALSE;
391   }
392 
393   if (has_null_pressures) {
394     IS             zerodiagc;
395     const PetscInt *idxs,*idxsc;
396     PetscInt       i,s,*nnz;
397 
398     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
399     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
400     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
401     /* local change of basis for pressures */
402     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
403     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr);
404     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
405     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
406     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
407     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
408     for (i=0;i<pcbddc->benign_n;i++) {
409       PetscInt nzs,j;
410 
411       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
412       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
413       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
414       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
415       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
416     }
417     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
418     ierr = PetscFree(nnz);CHKERRQ(ierr);
419     /* set identity on velocities */
420     for (i=0;i<n-nz;i++) {
421       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
422     }
423     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
424     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
425     ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
426     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
427     /* set change on pressures */
428     for (s=0;s<pcbddc->benign_n;s++) {
429       PetscScalar *array;
430       PetscInt    nzs;
431 
432       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
433       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
434       for (i=0;i<nzs-1;i++) {
435         PetscScalar vals[2];
436         PetscInt    cols[2];
437 
438         cols[0] = idxs[i];
439         cols[1] = idxs[nzs-1];
440         vals[0] = 1.;
441         vals[1] = 1.;
442         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
443       }
444       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
445       for (i=0;i<nzs-1;i++) array[i] = -1.;
446       array[nzs-1] = 1.;
447       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
448       /* store local idxs for p0 */
449       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
450       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
451       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
452       ierr = PetscFree(array);CHKERRQ(ierr);
453     }
454     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     /* TODO: need optimization? */
457     ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->benign_original_mat);CHKERRQ(ierr);
458     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
459     ierr = MatSeqAIJCompress(pcbddc->benign_original_mat,&pcbddc->local_mat);CHKERRQ(ierr);
460     ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
461     /* store global idxs for p0 */
462     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
465   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
466 
467   /* determines if the coarse solver will be singular or not */
468   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
469   *zerodiaglocal = zerodiag;
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
476 {
477   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   if (!pcbddc->benign_sf) {
482     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
483     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
484   }
485   if (get) { /* use SF to get values */
486     PetscScalar *array;
487 
488     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
489     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
490     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
492   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
493     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
494     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
495     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
503 {
504   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   /* TODO: add error checking
509     - avoid nested pop (or push) calls.
510     - cannot push before pop.
511     - cannot call this if pcbddc->local_mat is NULL
512   */
513   if (!pcbddc->benign_n) {
514     PetscFunctionReturn(0);
515   }
516   if (pop) {
517     IS       is_p0;
518     MatReuse reuse;
519 
520     /* extract B_0 */
521     reuse = MAT_INITIAL_MATRIX;
522     if (pcbddc->benign_B0) {
523       reuse = MAT_REUSE_MATRIX;
524     }
525     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
526     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
527     /* remove rows and cols from local problem */
528     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
529     ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
530     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
531     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
532   } else { /* push */
533     PetscInt i;
534 
535     for (i=0;i<pcbddc->benign_n;i++) {
536       PetscScalar *B0_vals;
537       PetscInt    *B0_cols,B0_ncol;
538 
539       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
540       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
541       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
542       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
543       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "PCBDDCAdaptiveSelection"
553 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
554 {
555   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
556   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
557   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
558   PetscBLASInt    *B_iwork,*B_ifail;
559   PetscScalar     *work,lwork;
560   PetscScalar     *St,*S,*eigv;
561   PetscScalar     *Sarray,*Starray;
562   PetscReal       *eigs,thresh;
563   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
564   PetscBool       allocated_S_St;
565 #if defined(PETSC_USE_COMPLEX)
566   PetscReal       *rwork;
567 #endif
568   PetscErrorCode  ierr;
569 
570   PetscFunctionBegin;
571   if (!sub_schurs->use_mumps) {
572     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
573   }
574 
575   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
576     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
577   }
578 
579   if (pcbddc->dbg_flag) {
580     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
581     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
582     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
584   }
585 
586   if (pcbddc->dbg_flag) {
587     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
588   }
589 
590   /* max size of subsets */
591   mss = 0;
592   for (i=0;i<sub_schurs->n_subs;i++) {
593     PetscInt subset_size;
594 
595     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
596     mss = PetscMax(mss,subset_size);
597   }
598 
599   /* min/max and threshold */
600   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
601   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
602   nmax = PetscMax(nmin,nmax);
603   allocated_S_St = PETSC_FALSE;
604   if (nmin) {
605     allocated_S_St = PETSC_TRUE;
606   }
607 
608   /* allocate lapack workspace */
609   cum = cum2 = 0;
610   maxneigs = 0;
611   for (i=0;i<sub_schurs->n_subs;i++) {
612     PetscInt n,subset_size;
613 
614     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
615     n = PetscMin(subset_size,nmax);
616     cum += subset_size;
617     cum2 += subset_size*n;
618     maxneigs = PetscMax(maxneigs,n);
619   }
620   if (mss) {
621     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
622       PetscBLASInt B_itype = 1;
623       PetscBLASInt B_N = mss;
624       PetscReal    zero = 0.0;
625       PetscReal    eps = 0.0; /* dlamch? */
626 
627       B_lwork = -1;
628       S = NULL;
629       St = NULL;
630       eigs = NULL;
631       eigv = NULL;
632       B_iwork = NULL;
633       B_ifail = NULL;
634 #if defined(PETSC_USE_COMPLEX)
635       rwork = NULL;
636 #endif
637       thresh = 1.0;
638       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
639 #if defined(PETSC_USE_COMPLEX)
640       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
641 #else
642       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
643 #endif
644       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
645       ierr = PetscFPTrapPop();CHKERRQ(ierr);
646     } else {
647         /* TODO */
648     }
649   } else {
650     lwork = 0;
651   }
652 
653   nv = 0;
654   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
655     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
656   }
657   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
658   if (allocated_S_St) {
659     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
660   }
661   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
662 #if defined(PETSC_USE_COMPLEX)
663   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
664 #endif
665   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
666                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
667                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
668                       nv+cum,&pcbddc->adaptive_constraints_idxs,
669                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
670   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
671 
672   maxneigs = 0;
673   cum = cum2 = cumarray = 0;
674   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
675   pcbddc->adaptive_constraints_data_ptr[0] = 0;
676   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
677     const PetscInt *idxs;
678 
679     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
680     for (cum=0;cum<nv;cum++) {
681       pcbddc->adaptive_constraints_n[cum] = 1;
682       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
683       pcbddc->adaptive_constraints_data[cum] = 1.0;
684       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
685       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
686     }
687     cum2 = cum;
688     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
689   }
690 
691   if (mss) { /* multilevel */
692     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
693     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
694   }
695 
696   thresh = pcbddc->adaptive_threshold;
697   for (i=0;i<sub_schurs->n_subs;i++) {
698     const PetscInt *idxs;
699     PetscReal      infty = PETSC_MAX_REAL;
700     PetscInt       j,subset_size,eigs_start = 0;
701     PetscBLASInt   B_N;
702     PetscBool      same_data = PETSC_FALSE;
703 
704     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
705     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
706     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
707     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
708       if (sub_schurs->is_hermitian) {
709         PetscInt j,k;
710         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
711           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
712           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
713         }
714         for (j=0;j<subset_size;j++) {
715           for (k=j;k<subset_size;k++) {
716             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
717             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
718           }
719         }
720       } else {
721         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
722         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
723       }
724     } else {
725       S = Sarray + cumarray;
726       St = Starray + cumarray;
727     }
728 
729     /* see if we can save some work */
730     if (sub_schurs->n_subs == 1) {
731       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
732     }
733 
734     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
735       B_neigs = 0;
736     } else {
737       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
738         PetscBLASInt B_itype = 1;
739         PetscBLASInt B_IL, B_IU;
740         PetscReal    eps = -1.0; /* dlamch? */
741         PetscInt     nmin_s;
742 
743         if (pcbddc->dbg_flag) {
744           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
745         }
746 
747         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
748         if (thresh > 1.+PETSC_SMALL) {
749 
750           /* ask for eigenvalues larger than thresh */
751 #if defined(PETSC_USE_COMPLEX)
752           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
753 #else
754           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
755 #endif
756         } else {
757           B_IU = PetscMax(1,PetscMin(B_N,nmax));
758           B_IL = 1;
759 #if defined(PETSC_USE_COMPLEX)
760           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
761 #else
762           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
763 #endif
764         }
765         ierr = PetscFPTrapPop();CHKERRQ(ierr);
766         if (B_ierr) {
767           if (B_ierr < 0 ) {
768             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
769           } else if (B_ierr <= B_N) {
770             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
771           } else {
772             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
773           }
774         }
775 
776         if (B_neigs > nmax) {
777           if (pcbddc->dbg_flag) {
778             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
779           }
780           eigs_start = B_neigs -nmax;
781           B_neigs = nmax;
782         }
783 
784         nmin_s = PetscMin(nmin,B_N);
785         if (B_neigs < nmin_s) {
786           PetscBLASInt B_neigs2;
787 
788           B_IU = B_N - B_neigs;
789           B_IL = B_N - nmin_s + 1;
790           if (pcbddc->dbg_flag) {
791             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
792           }
793           if (sub_schurs->is_hermitian) {
794             PetscInt j;
795             for (j=0;j<subset_size;j++) {
796               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
797             }
798             for (j=0;j<subset_size;j++) {
799               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
800             }
801           } else {
802             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
803             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
804           }
805           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
806 #if defined(PETSC_USE_COMPLEX)
807           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
808 #else
809           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
810 #endif
811           ierr = PetscFPTrapPop();CHKERRQ(ierr);
812           B_neigs += B_neigs2;
813         }
814         if (B_ierr) {
815           if (B_ierr < 0 ) {
816             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
817           } else if (B_ierr <= B_N) {
818             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
819           } else {
820             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
821           }
822         }
823         if (pcbddc->dbg_flag) {
824           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
825           for (j=0;j<B_neigs;j++) {
826             if (eigs[j] == 0.0) {
827               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
828             } else {
829               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
830             }
831           }
832         }
833       } else {
834           /* TODO */
835       }
836     }
837     maxneigs = PetscMax(B_neigs,maxneigs);
838     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
839     if (B_neigs) {
840       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
841 
842       if (pcbddc->dbg_flag > 1) {
843         PetscInt ii;
844         for (ii=0;ii<B_neigs;ii++) {
845           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
846           for (j=0;j<B_N;j++) {
847 #if defined(PETSC_USE_COMPLEX)
848             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
849             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
850             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
851 #else
852             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
853 #endif
854           }
855         }
856       }
857 #if 0
858       for (j=0;j<B_neigs;j++) {
859         PetscBLASInt Blas_N,Blas_one = 1.0;
860         PetscScalar norm;
861         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
862         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
863                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
864         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
865           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
866         } else {
867           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
868         }
869         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
870       }
871 #endif
872       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
873       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
874       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
875       cum++;
876     }
877     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
878     /* shift for next computation */
879     cumarray += subset_size*subset_size;
880   }
881   if (pcbddc->dbg_flag) {
882     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
883   }
884 
885   if (mss) {
886     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
887     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
888     /* destroy matrices (junk) */
889     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
890     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
891   }
892   if (allocated_S_St) {
893     ierr = PetscFree2(S,St);CHKERRQ(ierr);
894   }
895   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
896 #if defined(PETSC_USE_COMPLEX)
897   ierr = PetscFree(rwork);CHKERRQ(ierr);
898 #endif
899   if (pcbddc->dbg_flag) {
900     PetscInt maxneigs_r;
901     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
902     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
903   }
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "PCBDDCSetUpSolvers"
909 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
910 {
911   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
912   PetscScalar    *coarse_submat_vals;
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   /* Setup local scatters R_to_B and (optionally) R_to_D */
917   /* PCBDDCSetUpLocalWorkVectors should be called first! */
918   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
919 
920   /* Setup local neumann solver ksp_R */
921   /* PCBDDCSetUpLocalScatters should be called first! */
922   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
923 
924   /* Change global null space passed in by the user if change of basis has been requested */
925   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
926     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
927   }
928 
929   /*
930      Setup local correction and local part of coarse basis.
931      Gives back the dense local part of the coarse matrix in column major ordering
932   */
933   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
934 
935   /* Compute total number of coarse nodes and setup coarse solver */
936   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
937 
938   /* free */
939   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "PCBDDCResetCustomization"
945 PetscErrorCode PCBDDCResetCustomization(PC pc)
946 {
947   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
948   PetscErrorCode ierr;
949 
950   PetscFunctionBegin;
951   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
952   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
953   ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
954   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
955   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
956   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
957   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
958   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
959   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
960   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
961   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
962   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "PCBDDCResetTopography"
968 PetscErrorCode PCBDDCResetTopography(PC pc)
969 {
970   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
971   PetscInt       i;
972   PetscErrorCode ierr;
973 
974   PetscFunctionBegin;
975   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
976   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
977   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
978   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
979   for (i=0;i<pcbddc->n_local_subs;i++) {
980     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
981   }
982   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
983   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PCBDDCResetSolvers"
989 PetscErrorCode PCBDDCResetSolvers(PC pc)
990 {
991   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
992   PetscScalar    *array;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
997   if (pcbddc->coarse_phi_B) {
998     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
999     ierr = PetscFree(array);CHKERRQ(ierr);
1000   }
1001   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1002   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1003   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1004   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1005   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1006   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1007   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1008   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1009   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1010   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1011   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1012   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1013   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1014   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1015   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1016   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1017   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1018   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1019   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1020   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1021   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1022   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1023   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1024   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1025   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1026   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1027   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1028   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1029   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1035 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1036 {
1037   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1038   PC_IS          *pcis = (PC_IS*)pc->data;
1039   VecType        impVecType;
1040   PetscInt       n_constraints,n_R,old_size;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBegin;
1044   if (!pcbddc->ConstraintMatrix) {
1045     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1046   }
1047   /* get sizes */
1048   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1049   n_R = pcis->n - pcbddc->n_vertices;
1050   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1051   /* local work vectors (try to avoid unneeded work)*/
1052   /* R nodes */
1053   old_size = -1;
1054   if (pcbddc->vec1_R) {
1055     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1056   }
1057   if (n_R != old_size) {
1058     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1059     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1060     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1061     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1062     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1063     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1064   }
1065   /* local primal dofs */
1066   old_size = -1;
1067   if (pcbddc->vec1_P) {
1068     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1069   }
1070   if (pcbddc->local_primal_size != old_size) {
1071     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1072     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1073     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1074     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1075   }
1076   /* local explicit constraints */
1077   old_size = -1;
1078   if (pcbddc->vec1_C) {
1079     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1080   }
1081   if (n_constraints && n_constraints != old_size) {
1082     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1083     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1084     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1085     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "PCBDDCSetUpCorrection"
1092 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1093 {
1094   PetscErrorCode  ierr;
1095   /* pointers to pcis and pcbddc */
1096   PC_IS*          pcis = (PC_IS*)pc->data;
1097   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1098   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1099   /* submatrices of local problem */
1100   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1101   /* submatrices of benign trick */
1102   Mat             B0_V = NULL;
1103   /* submatrices of local coarse problem */
1104   Mat             S_VV,S_CV,S_VC,S_CC;
1105   /* working matrices */
1106   Mat             C_CR;
1107   /* additional working stuff */
1108   PC              pc_R;
1109   Mat             F;
1110   PetscBool       isLU,isCHOL,isILU;
1111 
1112   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1113   PetscScalar     *work;
1114   PetscInt        *idx_V_B;
1115   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
1116   PetscInt        i,n_R,n_D,n_B;
1117 
1118   /* some shortcuts to scalars */
1119   PetscScalar     one=1.0,m_one=-1.0;
1120 
1121   PetscFunctionBegin;
1122   if (!pcbddc->symmetric_primal && pcbddc->benign_n) {
1123     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
1124   }
1125 
1126   /* Set Non-overlapping dimensions */
1127   n_vertices = pcbddc->n_vertices;
1128   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1129   n_B = pcis->n_B;
1130   n_D = pcis->n - n_B;
1131   n_R = pcis->n - n_vertices;
1132 
1133   /* vertices in boundary numbering */
1134   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1135   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1136   if (i != n_vertices) {
1137     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1138   }
1139 
1140   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1141   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1142   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1143   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1144   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1145   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1146   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1147   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1148   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1149   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1150 
1151   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1152   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1153   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1154   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1155   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1156   lda_rhs = n_R;
1157   if (isLU || isILU || isCHOL) {
1158     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1159   } else if (sub_schurs->reuse_mumps) {
1160     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1161     MatFactorType type;
1162 
1163     F = reuse_mumps->F;
1164     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1165     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1166     ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr);
1167   } else {
1168     F = NULL;
1169   }
1170 
1171   /* allocate workspace */
1172   n = 0;
1173   if (n_constraints) {
1174     n += lda_rhs*n_constraints;
1175   }
1176   if (n_vertices) {
1177     n = PetscMax(2*lda_rhs*n_vertices,n);
1178     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
1179   }
1180   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1181 
1182   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1183   if (n_constraints) {
1184     Mat         M1,M2,M3;
1185     Mat         auxmat;
1186     IS          is_aux;
1187     PetscScalar *array,*array2;
1188 
1189     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1190     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1191 
1192     /* Extract constraints on R nodes: C_{CR}  */
1193     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1194     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1195     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1196 
1197     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1198     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1199     ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1200     for (i=0;i<n_constraints;i++) {
1201       const PetscScalar *row_cmat_values;
1202       const PetscInt    *row_cmat_indices;
1203       PetscInt          size_of_constraint,j;
1204 
1205       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1206       for (j=0;j<size_of_constraint;j++) {
1207         work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
1208       }
1209       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1210     }
1211     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1212     if (F) {
1213       Mat B;
1214 
1215       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1216       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1217       ierr = MatDestroy(&B);CHKERRQ(ierr);
1218     } else {
1219       PetscScalar *marr;
1220 
1221       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1222       for (i=0;i<n_constraints;i++) {
1223         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1224         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr);
1225         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1226         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1227         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1228       }
1229       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1230     }
1231     if (!pcbddc->switch_static) {
1232       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1233       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1234       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1235       for (i=0;i<n_constraints;i++) {
1236         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr);
1237         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1238         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1241         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1242       }
1243       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1244       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1245       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1246     } else {
1247       if (lda_rhs != n_R) {
1248         IS dummy;
1249 
1250         ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr);
1251         ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1252         ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1253       } else {
1254         ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1255         pcbddc->local_auxmat2 = local_auxmat2_R;
1256       }
1257       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1258     }
1259     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1260     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1261     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1262     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1263     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1264     if (isCHOL) {
1265       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1266     } else {
1267       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1268     }
1269     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1270     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1271     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1272     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1273     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1274     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1275     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1276     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1277     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1278     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1279   }
1280   /* Get submatrices from subdomain matrix */
1281   if (pcbddc->benign_n) {
1282     IS        dummy;
1283     Mat       B0_R;
1284     PetscReal norm;
1285 
1286     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1287     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1288     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1289     if (norm > PETSC_SMALL) {
1290       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1291     }
1292     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1293     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1294   }
1295 
1296   if (n_vertices) {
1297     IS is_aux;
1298 
1299     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1300       IS tis;
1301 
1302       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1303       ierr = ISSort(tis);CHKERRQ(ierr);
1304       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1305       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1306     } else {
1307       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1308     }
1309     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1310     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1311     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1312     if (pcbddc->benign_n) {
1313       IS dummy;
1314 
1315       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1316       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1317       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1318     }
1319     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1320   }
1321 
1322   /* Matrix of coarse basis functions (local) */
1323   if (pcbddc->coarse_phi_B) {
1324     PetscInt on_B,on_primal,on_D=n_D;
1325     if (pcbddc->coarse_phi_D) {
1326       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1327     }
1328     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1329     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1330       PetscScalar *marray;
1331 
1332       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1333       ierr = PetscFree(marray);CHKERRQ(ierr);
1334       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1335       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1336       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1337       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1338     }
1339   }
1340 
1341   if (!pcbddc->coarse_phi_B) {
1342     PetscScalar *marray;
1343 
1344     n = n_B*pcbddc->local_primal_size;
1345     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1346       n += n_D*pcbddc->local_primal_size;
1347     }
1348     if (!pcbddc->symmetric_primal) {
1349       n *= 2;
1350     }
1351     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1352     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1353     n = n_B*pcbddc->local_primal_size;
1354     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1355       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1356       n += n_D*pcbddc->local_primal_size;
1357     }
1358     if (!pcbddc->symmetric_primal) {
1359       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1360       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1361         n = n_B*pcbddc->local_primal_size;
1362         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1363       }
1364     } else {
1365       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1366       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1367       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1368         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1369         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1370       }
1371     }
1372   }
1373 
1374   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1375   p0_lidx_I = NULL;
1376   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1377     const PetscInt *idxs;
1378 
1379     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1380     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1381     for (i=0;i<pcbddc->benign_n;i++) {
1382       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1383     }
1384     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1385   }
1386 
1387   /* vertices */
1388   if (n_vertices) {
1389 
1390     ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1391 
1392     if (n_R) {
1393       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1394       PetscBLASInt B_N,B_one = 1;
1395       PetscScalar  *x,*y;
1396       PetscBool    isseqaij;
1397 
1398       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1399       if (lda_rhs == n_R) {
1400         ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1401       } else {
1402         Mat         A_RVt;
1403         PetscScalar *array;
1404 
1405         /* TODO optimize */
1406         ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr);
1407         ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1408         ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1409         ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr);
1410         for (i=0;i<n_vertices;i++) {
1411           ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr);
1412         }
1413         ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr);
1414         ierr = MatDestroy(&A_RVt);CHKERRQ(ierr);
1415         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr);
1416       }
1417       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1418       if (F) { /* TODO could be optimized for symmetric problems */
1419         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1420       } else {
1421         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1422         for (i=0;i<n_vertices;i++) {
1423           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr);
1424           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr);
1425           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1426           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1427           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1428         }
1429         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1430       }
1431       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1432       /* S_VV and S_CV */
1433       if (n_constraints) {
1434         Mat B;
1435 
1436         ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1437         for (i=0;i<n_vertices;i++) {
1438           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1439           ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr);
1440           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1443           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1444         }
1445         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1446         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1447         ierr = MatDestroy(&B);CHKERRQ(ierr);
1448         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1449         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1450         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1451         ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr);
1452         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
1453         ierr = MatDestroy(&B);CHKERRQ(ierr);
1454       }
1455       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1456       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1457         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1458       }
1459       if (lda_rhs != n_R) {
1460         ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1461         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1462         ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr);
1463       }
1464       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1465       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1466       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1467       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1468       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1469       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1470       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1471       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1472       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1473       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1474     } else {
1475       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1476     }
1477     for (i=0;i<pcbddc->benign_n;i++) {
1478       const PetscScalar *vals;
1479       const PetscInt    *idxs;
1480       PetscInt          n,j,primal_idx;
1481 
1482       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1483       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1484       for (j=0;j<n;j++) {
1485         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1486         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1487       }
1488       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1489     }
1490     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1491 
1492     /* coarse basis functions */
1493     for (i=0;i<n_vertices;i++) {
1494       PetscScalar *y;
1495 
1496       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1497       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1498       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501       y[n_B*i+idx_V_B[i]] = 1.0;
1502       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1503       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1504 
1505       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1506         PetscInt j;
1507 
1508         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1509         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1510         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1513         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1514         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1515       }
1516       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1517     }
1518     /* if n_R == 0 the object is not destroyed */
1519     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1520   }
1521 
1522   if (n_constraints) {
1523     Mat B;
1524 
1525     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1526     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1527     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1528     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1529     if (n_vertices) {
1530       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1531         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1532       } else {
1533         Mat S_VCt;
1534 
1535         if (lda_rhs != n_R) {
1536           ierr = MatDestroy(&B);CHKERRQ(ierr);
1537           ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr);
1538           ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr);
1539         }
1540         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1541         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1542         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1543       }
1544     }
1545     ierr = MatDestroy(&B);CHKERRQ(ierr);
1546     /* coarse basis functions */
1547     for (i=0;i<n_constraints;i++) {
1548       PetscScalar *y;
1549 
1550       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1551       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1552       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1553       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1556       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1557       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1558         PetscInt j;
1559 
1560         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1561         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1562         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1565         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1566         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1567       }
1568       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1569     }
1570   }
1571   if (n_constraints) {
1572     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1573   }
1574   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1575   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1576 
1577   /* compute other basis functions for non-symmetric problems */
1578   if (!pcbddc->symmetric_primal) {
1579     Mat         B_V=NULL,B_C=NULL;
1580     PetscScalar *marray;
1581 
1582     if (n_constraints) {
1583       Mat S_CCT,C_CRT;
1584 
1585       ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr);
1586       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1587       ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1588       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1589       if (n_vertices) {
1590         Mat S_VCT;
1591 
1592         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1593         ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1594         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1595       }
1596       ierr = MatDestroy(&C_CRT);CHKERRQ(ierr);
1597     }
1598     if (n_vertices && n_R) {
1599       PetscScalar    *av,*marray;
1600       const PetscInt *xadj,*adjncy;
1601       PetscInt       n;
1602       PetscBool      flg_row;
1603 
1604       /* B_V = B_V - A_VR^T */
1605       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1606       ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1607       ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr);
1608       ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1609       for (i=0;i<n;i++) {
1610         PetscInt j;
1611         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
1612       }
1613       ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1614       ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1615       ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1616     }
1617 
1618     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1619     ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1620     for (i=0;i<n_vertices;i++) {
1621       ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr);
1622       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1623       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1624       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1625       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1626     }
1627     ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1628     ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr);
1629     for (i=n_vertices;i<n_constraints+n_vertices;i++) {
1630       ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr);
1631       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1632       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1633       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1634       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1635     }
1636     ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr);
1637     /* coarse basis functions */
1638     for (i=0;i<pcbddc->local_primal_size;i++) {
1639       PetscScalar *y;
1640 
1641       ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1642       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1643       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1644       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646       if (i<n_vertices) {
1647         y[n_B*i+idx_V_B[i]] = 1.0;
1648       }
1649       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1650       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1651 
1652       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1653         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1654         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1655         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1658         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1659       }
1660       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1661     }
1662     ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1663     ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1664   }
1665   /* free memory */
1666   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1667   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1668   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1669   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1670   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1671   ierr = PetscFree(work);CHKERRQ(ierr);
1672   if (n_vertices) {
1673     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1674   }
1675   if (n_constraints) {
1676     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1677   }
1678   /* Checking coarse_sub_mat and coarse basis functios */
1679   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1680   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1681   if (pcbddc->dbg_flag) {
1682     Mat         coarse_sub_mat;
1683     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1684     Mat         coarse_phi_D,coarse_phi_B;
1685     Mat         coarse_psi_D,coarse_psi_B;
1686     Mat         A_II,A_BB,A_IB,A_BI;
1687     Mat         C_B,CPHI;
1688     IS          is_dummy;
1689     Vec         mones;
1690     MatType     checkmattype=MATSEQAIJ;
1691     PetscReal   real_value;
1692 
1693     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1694     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1695     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1696     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1697     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1698     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1699     if (!pcbddc->symmetric_primal) {
1700       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1701       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1702     }
1703     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1704 
1705     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1706     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1707     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1708     if (!pcbddc->symmetric_primal) {
1709       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1710       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1711       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1712       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1713       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1714       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1715       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1716       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1717       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1718       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1719       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1720       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1721     } else {
1722       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1723       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1724       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1725       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1726       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1727       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1728       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1729       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1730     }
1731     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1732     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1733     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1734     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1735     if (pcbddc->benign_n) {
1736       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1737       PetscScalar *data,*data2;
1738       PetscInt    j;
1739 
1740       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1741       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1742       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1743       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1744       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1745       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1746       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1747       for (j=0;j<pcbddc->benign_n;j++) {
1748         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1749         for (i=0;i<pcbddc->local_primal_size;i++) {
1750           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1751           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1752         }
1753       }
1754       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1755       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1756       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1757       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1758       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1759       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1760       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1761       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1762       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1763     }
1764 #if 0
1765   {
1766     PetscViewer viewer;
1767     char filename[256];
1768     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
1769     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1770     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1771     ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr);
1772     ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr);
1773     ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr);
1774     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1775     ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr);
1776     ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr);
1777     if (pcbddc->coarse_phi_D) {
1778       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr);
1779       ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr);
1780     }
1781     if (pcbddc->coarse_psi_B) {
1782       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr);
1783       ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr);
1784     }
1785     if (pcbddc->coarse_phi_B) {
1786       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr);
1787       ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr);
1788     }
1789     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1790   }
1791 #endif
1792     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1793     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1794     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1796 
1797     /* check constraints */
1798     if (!pcbddc->benign_n) { /* TODO: add benign case */
1799       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1800       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1801       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1802       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1803       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1804       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1805       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1806       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1807       if (!pcbddc->symmetric_primal) {
1808         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1809         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1810         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1811         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1812         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1813       }
1814       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1815       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1816       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1817       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1818     }
1819     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1820     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1821     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1822     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1823     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1824     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1825     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1826     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1827     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1828     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1829     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1830     if (!pcbddc->symmetric_primal) {
1831       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1832       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1833     }
1834     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1835   }
1836   /* get back data */
1837   *coarse_submat_vals_n = coarse_submat_vals;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1843 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1844 {
1845   Mat            *work_mat;
1846   IS             isrow_s,iscol_s;
1847   PetscBool      rsorted,csorted;
1848   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1849   PetscErrorCode ierr;
1850 
1851   PetscFunctionBegin;
1852   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1853   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1854   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1855   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1856 
1857   if (!rsorted) {
1858     const PetscInt *idxs;
1859     PetscInt *idxs_sorted,i;
1860 
1861     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1862     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1863     for (i=0;i<rsize;i++) {
1864       idxs_perm_r[i] = i;
1865     }
1866     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1867     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1868     for (i=0;i<rsize;i++) {
1869       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1870     }
1871     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1872     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1873   } else {
1874     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1875     isrow_s = isrow;
1876   }
1877 
1878   if (!csorted) {
1879     if (isrow == iscol) {
1880       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1881       iscol_s = isrow_s;
1882     } else {
1883       const PetscInt *idxs;
1884       PetscInt *idxs_sorted,i;
1885 
1886       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1887       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1888       for (i=0;i<csize;i++) {
1889         idxs_perm_c[i] = i;
1890       }
1891       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1892       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1893       for (i=0;i<csize;i++) {
1894         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1895       }
1896       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1897       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1898     }
1899   } else {
1900     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1901     iscol_s = iscol;
1902   }
1903 
1904   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1905 
1906   if (!rsorted || !csorted) {
1907     Mat      new_mat;
1908     IS       is_perm_r,is_perm_c;
1909 
1910     if (!rsorted) {
1911       PetscInt *idxs_r,i;
1912       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1913       for (i=0;i<rsize;i++) {
1914         idxs_r[idxs_perm_r[i]] = i;
1915       }
1916       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1917       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1918     } else {
1919       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1920     }
1921     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1922 
1923     if (!csorted) {
1924       if (isrow_s == iscol_s) {
1925         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1926         is_perm_c = is_perm_r;
1927       } else {
1928         PetscInt *idxs_c,i;
1929         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1930         for (i=0;i<csize;i++) {
1931           idxs_c[idxs_perm_c[i]] = i;
1932         }
1933         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1934         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1935       }
1936     } else {
1937       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1938     }
1939     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1940 
1941     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1942     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1943     work_mat[0] = new_mat;
1944     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1945     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1946   }
1947 
1948   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1949   *B = work_mat[0];
1950   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1951   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1952   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1958 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1959 {
1960   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1961   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1962   Mat            new_mat;
1963   IS             is_local,is_global;
1964   PetscInt       local_size;
1965   PetscBool      isseqaij;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1970   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1971   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1972   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1973   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1974   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1975   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1976 
1977   /* check */
1978   if (pcbddc->dbg_flag) {
1979     Vec       x,x_change;
1980     PetscReal error;
1981 
1982     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1983     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1984     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1985     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1988     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1989     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1990     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1991     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1993     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1994     ierr = VecDestroy(&x);CHKERRQ(ierr);
1995     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1996   }
1997 
1998   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1999   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
2000   if (isseqaij) {
2001     Mat M;
2002 
2003     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2004     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2005     ierr = MatDestroy(&M);CHKERRQ(ierr);
2006   } else {
2007     Mat work_mat,M;
2008 
2009     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
2010     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2011     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2012     ierr = MatDestroy(&M);CHKERRQ(ierr);
2013     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
2014   }
2015   if (matis->A->symmetric_set) {
2016     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
2017 #if !defined(PETSC_USE_COMPLEX)
2018     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
2019 #endif
2020   }
2021   /*
2022   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2023   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
2024   */
2025   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
2031 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2032 {
2033   PC_IS*          pcis = (PC_IS*)(pc->data);
2034   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2035   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2036   PetscInt        *idx_R_local=NULL;
2037   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2038   PetscInt        vbs,bs;
2039   PetscBT         bitmask=NULL;
2040   PetscErrorCode  ierr;
2041 
2042   PetscFunctionBegin;
2043   /*
2044     No need to setup local scatters if
2045       - primal space is unchanged
2046         AND
2047       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2048         AND
2049       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2050   */
2051   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2052     PetscFunctionReturn(0);
2053   }
2054   /* destroy old objects */
2055   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2056   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2057   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2058   /* Set Non-overlapping dimensions */
2059   n_B = pcis->n_B;
2060   n_D = pcis->n - n_B;
2061   n_vertices = pcbddc->n_vertices;
2062 
2063   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2064 
2065   /* create auxiliary bitmask and allocate workspace */
2066   if (!sub_schurs->reuse_mumps) {
2067     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2068     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2069     for (i=0;i<n_vertices;i++) {
2070       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2071     }
2072 
2073     for (i=0, n_R=0; i<pcis->n; i++) {
2074       if (!PetscBTLookup(bitmask,i)) {
2075         idx_R_local[n_R++] = i;
2076       }
2077     }
2078   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2079     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2080 
2081     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2082     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2083   }
2084 
2085   /* Block code */
2086   vbs = 1;
2087   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2088   if (bs>1 && !(n_vertices%bs)) {
2089     PetscBool is_blocked = PETSC_TRUE;
2090     PetscInt  *vary;
2091     if (!sub_schurs->reuse_mumps) {
2092       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2093       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2094       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2095       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2096       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2097       for (i=0; i<pcis->n/bs; i++) {
2098         if (vary[i]!=0 && vary[i]!=bs) {
2099           is_blocked = PETSC_FALSE;
2100           break;
2101         }
2102       }
2103       ierr = PetscFree(vary);CHKERRQ(ierr);
2104     } else {
2105       /* Verify directly the R set */
2106       for (i=0; i<n_R/bs; i++) {
2107         PetscInt j,node=idx_R_local[bs*i];
2108         for (j=1; j<bs; j++) {
2109           if (node != idx_R_local[bs*i+j]-j) {
2110             is_blocked = PETSC_FALSE;
2111             break;
2112           }
2113         }
2114       }
2115     }
2116     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2117       vbs = bs;
2118       for (i=0;i<n_R/vbs;i++) {
2119         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2120       }
2121     }
2122   }
2123   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2124   if (sub_schurs->reuse_mumps) {
2125     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2126 
2127     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2128     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2129     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2130     reuse_mumps->is_R = pcbddc->is_R_local;
2131   } else {
2132     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2133   }
2134 
2135   /* print some info if requested */
2136   if (pcbddc->dbg_flag) {
2137     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2138     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2139     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2140     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2141     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2142     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2143     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2144   }
2145 
2146   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2147   if (!sub_schurs->reuse_mumps) {
2148     IS       is_aux1,is_aux2;
2149     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2150 
2151     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2152     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2153     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2154     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2155     for (i=0; i<n_D; i++) {
2156       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2157     }
2158     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2159     for (i=0, j=0; i<n_R; i++) {
2160       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2161         aux_array1[j++] = i;
2162       }
2163     }
2164     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2165     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2166     for (i=0, j=0; i<n_B; i++) {
2167       if (!PetscBTLookup(bitmask,is_indices[i])) {
2168         aux_array2[j++] = i;
2169       }
2170     }
2171     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2172     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2173     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2174     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2175     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2176 
2177     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2178       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2179       for (i=0, j=0; i<n_R; i++) {
2180         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2181           aux_array1[j++] = i;
2182         }
2183       }
2184       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2185       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2186       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2187     }
2188     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2189     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2190   } else {
2191     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2192     IS               tis;
2193     PetscInt         schur_size;
2194 
2195     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2196     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2197     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2198     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2199     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2200       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2201       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2202       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2203     }
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2211 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2212 {
2213   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2214   PC_IS          *pcis = (PC_IS*)pc->data;
2215   PC             pc_temp;
2216   Mat            A_RR;
2217   MatReuse       reuse;
2218   PetscScalar    m_one = -1.0;
2219   PetscReal      value;
2220   PetscInt       n_D,n_R;
2221   PetscBool      use_exact,use_exact_reduced,issbaij;
2222   PetscErrorCode ierr;
2223   /* prefixes stuff */
2224   char           dir_prefix[256],neu_prefix[256],str_level[16];
2225   size_t         len;
2226 
2227   PetscFunctionBegin;
2228 
2229   /* compute prefixes */
2230   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2231   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2232   if (!pcbddc->current_level) {
2233     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2234     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2235     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2236     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2237   } else {
2238     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2239     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2240     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2241     len -= 15; /* remove "pc_bddc_coarse_" */
2242     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2243     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2244     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2245     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2246     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2247     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2248     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2249     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2250   }
2251 
2252   /* DIRICHLET PROBLEM */
2253   if (dirichlet) {
2254     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2255     if (pcbddc->local_mat->symmetric_set) {
2256       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2257     }
2258     /* Matrix for Dirichlet problem is pcis->A_II */
2259     n_D = pcis->n - pcis->n_B;
2260     if (!pcbddc->ksp_D) { /* create object if not yet build */
2261       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2262       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2263       /* default */
2264       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2265       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2266       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2267       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2268       if (issbaij) {
2269         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2270       } else {
2271         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2272       }
2273       /* Allow user's customization */
2274       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2275       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2276     }
2277     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2278     if (sub_schurs->reuse_mumps) {
2279       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2280 
2281       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2282     }
2283     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2284     if (!n_D) {
2285       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2286       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2287     }
2288     /* Set Up KSP for Dirichlet problem of BDDC */
2289     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2290     /* set ksp_D into pcis data */
2291     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2292     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2293     pcis->ksp_D = pcbddc->ksp_D;
2294   }
2295 
2296   /* NEUMANN PROBLEM */
2297   A_RR = 0;
2298   if (neumann) {
2299     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2300     PetscInt        ibs,mbs;
2301     PetscBool       issbaij;
2302     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2303     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2304     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2305     if (pcbddc->ksp_R) { /* already created ksp */
2306       PetscInt nn_R;
2307       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2308       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2309       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2310       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2311         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2312         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2313         reuse = MAT_INITIAL_MATRIX;
2314       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2315         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2316           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2317           reuse = MAT_INITIAL_MATRIX;
2318         } else { /* safe to reuse the matrix */
2319           reuse = MAT_REUSE_MATRIX;
2320         }
2321       }
2322       /* last check */
2323       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2324         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2325         reuse = MAT_INITIAL_MATRIX;
2326       }
2327     } else { /* first time, so we need to create the matrix */
2328       reuse = MAT_INITIAL_MATRIX;
2329     }
2330     /* extract A_RR */
2331     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2332     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2333     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2334     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2335       if (matis->A == pcbddc->local_mat) {
2336         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2337         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2338       } else {
2339         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2340       }
2341     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2342       if (matis->A == pcbddc->local_mat) {
2343         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2344         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2345       } else {
2346         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2347       }
2348     }
2349     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2350     if (pcbddc->local_mat->symmetric_set) {
2351       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2352     }
2353     if (!pcbddc->ksp_R) { /* create object if not present */
2354       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2355       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2356       /* default */
2357       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2358       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2359       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2360       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2361       if (issbaij) {
2362         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2363       } else {
2364         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2365       }
2366       /* Allow user's customization */
2367       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2368       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2369     }
2370     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2371     if (!n_R) {
2372       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2373       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2374     }
2375     /* Reuse MUMPS solver if it is present */
2376     if (sub_schurs->reuse_mumps) {
2377       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2378 
2379       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2380     }
2381     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2382     /* Set Up KSP for Neumann problem of BDDC */
2383     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2384   }
2385   /* free Neumann problem's matrix */
2386   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2387 
2388   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2389   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2390     if (pcbddc->dbg_flag) {
2391       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2392       ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2393       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2394     }
2395     if (dirichlet) { /* Dirichlet */
2396       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2397       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2398       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2399       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2400       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2401       /* need to be adapted? */
2402       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2403       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2404       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2405       /* print info */
2406       if (pcbddc->dbg_flag) {
2407         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2408         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2409       }
2410       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2411         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2412       }
2413     }
2414     if (neumann) { /* Neumann */
2415       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2416       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2417       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2418       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2419       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2420       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2421       /* need to be adapted? */
2422       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2423       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2424       /* print info */
2425       if (pcbddc->dbg_flag) {
2426         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2427         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2428       }
2429       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2430         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2431       }
2432     }
2433   }
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2439 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2440 {
2441   PetscErrorCode  ierr;
2442   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2443   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2444 
2445   PetscFunctionBegin;
2446   if (!sub_schurs->reuse_mumps) {
2447     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2448   }
2449   if (!pcbddc->switch_static) {
2450     if (applytranspose && pcbddc->local_auxmat1) {
2451       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2452       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2453     }
2454     if (!sub_schurs->reuse_mumps) {
2455       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2456       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2457     } else {
2458       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2459 
2460       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2461       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2462     }
2463   } else {
2464     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2465     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2467     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2468     if (applytranspose && pcbddc->local_auxmat1) {
2469       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2470       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2471       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2472       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2473     }
2474   }
2475   if (!sub_schurs->reuse_mumps) {
2476     if (applytranspose) {
2477       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2478     } else {
2479       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2480     }
2481   } else {
2482     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2483 
2484     if (applytranspose) {
2485       ierr = MatFactorSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2486     } else {
2487       ierr = MatFactorSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2488     }
2489   }
2490   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2491   if (!pcbddc->switch_static) {
2492     if (!sub_schurs->reuse_mumps) {
2493       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2494       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2495     } else {
2496       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2497 
2498       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2499       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2500     }
2501     if (!applytranspose && pcbddc->local_auxmat1) {
2502       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2503       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2504     }
2505   } else {
2506     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2509     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2510     if (!applytranspose && pcbddc->local_auxmat1) {
2511       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2512       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2513     }
2514     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2515     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2516     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2517     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2523 #undef __FUNCT__
2524 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2525 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2526 {
2527   PetscErrorCode ierr;
2528   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2529   PC_IS*            pcis = (PC_IS*)  (pc->data);
2530   const PetscScalar zero = 0.0;
2531 
2532   PetscFunctionBegin;
2533   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2534   if (applytranspose) {
2535     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2536     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2537   } else {
2538     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2539     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2540   }
2541 
2542   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2543   if (pcbddc->benign_n) {
2544     PetscScalar *array;
2545     PetscInt    j;
2546 
2547     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2548     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2549     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2550   }
2551 
2552   /* start communications from local primal nodes to rhs of coarse solver */
2553   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2554   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2555   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2556 
2557   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2558   /* TODO remove null space when doing multilevel */
2559   if (pcbddc->coarse_ksp) {
2560     Mat coarse_mat;
2561     Vec rhs,sol;
2562     MatNullSpace nullsp;
2563 
2564     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2565     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2566     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2567     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2568     if (nullsp) {
2569       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2570     }
2571     if (applytranspose) {
2572       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2573     } else {
2574       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2575     }
2576     if (nullsp) {
2577       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2578     }
2579   }
2580 
2581   /* Local solution on R nodes */
2582   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2583     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2584   }
2585 
2586   /* communications from coarse sol to local primal nodes */
2587   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2588   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2589 
2590   /* Sum contributions from two levels */
2591   if (applytranspose) {
2592     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2593     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2594   } else {
2595     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2596     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2597   }
2598   /* store p0 */
2599   if (pcbddc->benign_n) {
2600     PetscScalar *array;
2601     PetscInt    j;
2602 
2603     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2604     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2605     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2606   }
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 #undef __FUNCT__
2611 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2612 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2613 {
2614   PetscErrorCode ierr;
2615   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2616   PetscScalar    *array;
2617   Vec            from,to;
2618 
2619   PetscFunctionBegin;
2620   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2621     from = pcbddc->coarse_vec;
2622     to = pcbddc->vec1_P;
2623     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2624       Vec tvec;
2625 
2626       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2627       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2628       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2629       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2630       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2631       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2632     }
2633   } else { /* from local to global -> put data in coarse right hand side */
2634     from = pcbddc->vec1_P;
2635     to = pcbddc->coarse_vec;
2636   }
2637   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 #undef __FUNCT__
2642 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2643 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2644 {
2645   PetscErrorCode ierr;
2646   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2647   PetscScalar    *array;
2648   Vec            from,to;
2649 
2650   PetscFunctionBegin;
2651   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2652     from = pcbddc->coarse_vec;
2653     to = pcbddc->vec1_P;
2654   } else { /* from local to global -> put data in coarse right hand side */
2655     from = pcbddc->vec1_P;
2656     to = pcbddc->coarse_vec;
2657   }
2658   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2659   if (smode == SCATTER_FORWARD) {
2660     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2661       Vec tvec;
2662 
2663       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2664       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2665       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2666       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2667     }
2668   } else {
2669     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2670      ierr = VecResetArray(from);CHKERRQ(ierr);
2671     }
2672   }
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 /* uncomment for testing purposes */
2677 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2678 #undef __FUNCT__
2679 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2680 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2681 {
2682   PetscErrorCode    ierr;
2683   PC_IS*            pcis = (PC_IS*)(pc->data);
2684   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2685   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2686   /* one and zero */
2687   PetscScalar       one=1.0,zero=0.0;
2688   /* space to store constraints and their local indices */
2689   PetscScalar       *constraints_data;
2690   PetscInt          *constraints_idxs,*constraints_idxs_B;
2691   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2692   PetscInt          *constraints_n;
2693   /* iterators */
2694   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2695   /* BLAS integers */
2696   PetscBLASInt      lwork,lierr;
2697   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2698   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2699   /* reuse */
2700   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2701   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2702   /* change of basis */
2703   PetscBool         qr_needed;
2704   PetscBT           change_basis,qr_needed_idx;
2705   /* auxiliary stuff */
2706   PetscInt          *nnz,*is_indices;
2707   PetscInt          ncc;
2708   /* some quantities */
2709   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2710   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2711 
2712   PetscFunctionBegin;
2713   /* Destroy Mat objects computed previously */
2714   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2715   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2716   /* save info on constraints from previous setup (if any) */
2717   olocal_primal_size = pcbddc->local_primal_size;
2718   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2719   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2720   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2721   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2722   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2723   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2724 
2725   /* print some info */
2726   if (pcbddc->dbg_flag) {
2727     IS       vertices;
2728     PetscInt nv,nedges,nfaces;
2729     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2730     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2731     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2732     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2733     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2734     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2735     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2736     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2737     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2738     ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
2739   }
2740 
2741   if (!pcbddc->adaptive_selection) {
2742     IS           ISForVertices,*ISForFaces,*ISForEdges;
2743     MatNullSpace nearnullsp;
2744     const Vec    *nearnullvecs;
2745     Vec          *localnearnullsp;
2746     PetscScalar  *array;
2747     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2748     PetscBool    nnsp_has_cnst;
2749     /* LAPACK working arrays for SVD or POD */
2750     PetscBool    skip_lapack,boolforchange;
2751     PetscScalar  *work;
2752     PetscReal    *singular_vals;
2753 #if defined(PETSC_USE_COMPLEX)
2754     PetscReal    *rwork;
2755 #endif
2756 #if defined(PETSC_MISSING_LAPACK_GESVD)
2757     PetscScalar  *temp_basis,*correlation_mat;
2758 #else
2759     PetscBLASInt dummy_int=1;
2760     PetscScalar  dummy_scalar=1.;
2761 #endif
2762 
2763     /* Get index sets for faces, edges and vertices from graph */
2764     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2765     /* free unneeded index sets */
2766     if (!pcbddc->use_vertices) {
2767       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2768     }
2769     if (!pcbddc->use_edges) {
2770       for (i=0;i<n_ISForEdges;i++) {
2771         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2772       }
2773       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2774       n_ISForEdges = 0;
2775     }
2776     if (!pcbddc->use_faces) {
2777       for (i=0;i<n_ISForFaces;i++) {
2778         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2779       }
2780       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2781       n_ISForFaces = 0;
2782     }
2783 
2784 #if defined(PETSC_USE_DEBUG)
2785     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2786        Also use_change_of_basis should be consistent among processors */
2787     if (pcbddc->NullSpace) {
2788       PetscBool tbool[2],gbool[2];
2789 
2790       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2791         pcbddc->use_change_of_basis = PETSC_TRUE;
2792         if (!ISForEdges) {
2793           pcbddc->use_change_on_faces = PETSC_TRUE;
2794         }
2795       }
2796       tbool[0] = pcbddc->use_change_of_basis;
2797       tbool[1] = pcbddc->use_change_on_faces;
2798       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2799       pcbddc->use_change_of_basis = gbool[0];
2800       pcbddc->use_change_on_faces = gbool[1];
2801     }
2802 #endif
2803 
2804     /* check if near null space is attached to global mat */
2805     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2806     if (nearnullsp) {
2807       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2808       /* remove any stored info */
2809       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2810       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2811       /* store information for BDDC solver reuse */
2812       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2813       pcbddc->onearnullspace = nearnullsp;
2814       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2815       for (i=0;i<nnsp_size;i++) {
2816         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2817       }
2818     } else { /* if near null space is not provided BDDC uses constants by default */
2819       nnsp_size = 0;
2820       nnsp_has_cnst = PETSC_TRUE;
2821     }
2822     /* get max number of constraints on a single cc */
2823     max_constraints = nnsp_size;
2824     if (nnsp_has_cnst) max_constraints++;
2825 
2826     /*
2827          Evaluate maximum storage size needed by the procedure
2828          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2829          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2830          There can be multiple constraints per connected component
2831                                                                                                                                                            */
2832     n_vertices = 0;
2833     if (ISForVertices) {
2834       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2835     }
2836     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2837     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2838 
2839     total_counts = n_ISForFaces+n_ISForEdges;
2840     total_counts *= max_constraints;
2841     total_counts += n_vertices;
2842     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2843 
2844     total_counts = 0;
2845     max_size_of_constraint = 0;
2846     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2847       IS used_is;
2848       if (i<n_ISForEdges) {
2849         used_is = ISForEdges[i];
2850       } else {
2851         used_is = ISForFaces[i-n_ISForEdges];
2852       }
2853       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2854       total_counts += j;
2855       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2856     }
2857     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2858 
2859     /* get local part of global near null space vectors */
2860     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2861     for (k=0;k<nnsp_size;k++) {
2862       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2863       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2864       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2865     }
2866 
2867     /* whether or not to skip lapack calls */
2868     skip_lapack = PETSC_TRUE;
2869     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2870 
2871     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2872     if (!skip_lapack) {
2873       PetscScalar temp_work;
2874 
2875 #if defined(PETSC_MISSING_LAPACK_GESVD)
2876       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2877       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2878       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2879       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2880 #if defined(PETSC_USE_COMPLEX)
2881       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2882 #endif
2883       /* now we evaluate the optimal workspace using query with lwork=-1 */
2884       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2885       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2886       lwork = -1;
2887       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2888 #if !defined(PETSC_USE_COMPLEX)
2889       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2890 #else
2891       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2892 #endif
2893       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2894       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2895 #else /* on missing GESVD */
2896       /* SVD */
2897       PetscInt max_n,min_n;
2898       max_n = max_size_of_constraint;
2899       min_n = max_constraints;
2900       if (max_size_of_constraint < max_constraints) {
2901         min_n = max_size_of_constraint;
2902         max_n = max_constraints;
2903       }
2904       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2905 #if defined(PETSC_USE_COMPLEX)
2906       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2907 #endif
2908       /* now we evaluate the optimal workspace using query with lwork=-1 */
2909       lwork = -1;
2910       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2911       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2912       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2913       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2914 #if !defined(PETSC_USE_COMPLEX)
2915       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2916 #else
2917       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2918 #endif
2919       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2920       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2921 #endif /* on missing GESVD */
2922       /* Allocate optimal workspace */
2923       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2924       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2925     }
2926     /* Now we can loop on constraining sets */
2927     total_counts = 0;
2928     constraints_idxs_ptr[0] = 0;
2929     constraints_data_ptr[0] = 0;
2930     /* vertices */
2931     if (n_vertices) {
2932       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2933       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2934       for (i=0;i<n_vertices;i++) {
2935         constraints_n[total_counts] = 1;
2936         constraints_data[total_counts] = 1.0;
2937         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2938         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2939         total_counts++;
2940       }
2941       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2942       n_vertices = total_counts;
2943     }
2944 
2945     /* edges and faces */
2946     total_counts_cc = total_counts;
2947     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2948       IS        used_is;
2949       PetscBool idxs_copied = PETSC_FALSE;
2950 
2951       if (ncc<n_ISForEdges) {
2952         used_is = ISForEdges[ncc];
2953         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2954       } else {
2955         used_is = ISForFaces[ncc-n_ISForEdges];
2956         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2957       }
2958       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2959 
2960       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2961       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2962       /* change of basis should not be performed on local periodic nodes */
2963       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2964       if (nnsp_has_cnst) {
2965         PetscScalar quad_value;
2966 
2967         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2968         idxs_copied = PETSC_TRUE;
2969 
2970         if (!pcbddc->use_nnsp_true) {
2971           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2972         } else {
2973           quad_value = 1.0;
2974         }
2975         for (j=0;j<size_of_constraint;j++) {
2976           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2977         }
2978         temp_constraints++;
2979         total_counts++;
2980       }
2981       for (k=0;k<nnsp_size;k++) {
2982         PetscReal real_value;
2983         PetscScalar *ptr_to_data;
2984 
2985         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2986         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2987         for (j=0;j<size_of_constraint;j++) {
2988           ptr_to_data[j] = array[is_indices[j]];
2989         }
2990         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2991         /* check if array is null on the connected component */
2992         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2993         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2994         if (real_value > 0.0) { /* keep indices and values */
2995           temp_constraints++;
2996           total_counts++;
2997           if (!idxs_copied) {
2998             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2999             idxs_copied = PETSC_TRUE;
3000           }
3001         }
3002       }
3003       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3004       valid_constraints = temp_constraints;
3005       if (!pcbddc->use_nnsp_true && temp_constraints) {
3006         if (temp_constraints == 1) { /* just normalize the constraint */
3007           PetscScalar norm,*ptr_to_data;
3008 
3009           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3010           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3011           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
3012           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
3013           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
3014         } else { /* perform SVD */
3015           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
3016           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3017 
3018 #if defined(PETSC_MISSING_LAPACK_GESVD)
3019           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
3020              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
3021              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
3022                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3023                 from that computed using LAPACKgesvd
3024              -> This is due to a different computation of eigenvectors in LAPACKheev
3025              -> The quality of the POD-computed basis will be the same */
3026           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3027           /* Store upper triangular part of correlation matrix */
3028           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3029           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3030           for (j=0;j<temp_constraints;j++) {
3031             for (k=0;k<j+1;k++) {
3032               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3033             }
3034           }
3035           /* compute eigenvalues and eigenvectors of correlation matrix */
3036           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3037           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3038 #if !defined(PETSC_USE_COMPLEX)
3039           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3040 #else
3041           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3042 #endif
3043           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3044           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3045           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3046           j = 0;
3047           while (j < temp_constraints && singular_vals[j] < tol) j++;
3048           total_counts = total_counts-j;
3049           valid_constraints = temp_constraints-j;
3050           /* scale and copy POD basis into used quadrature memory */
3051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3052           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3053           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3054           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3055           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3056           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3057           if (j<temp_constraints) {
3058             PetscInt ii;
3059             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3060             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3061             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3062             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3063             for (k=0;k<temp_constraints-j;k++) {
3064               for (ii=0;ii<size_of_constraint;ii++) {
3065                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3066               }
3067             }
3068           }
3069 #else  /* on missing GESVD */
3070           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3071           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3072           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3073           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3074 #if !defined(PETSC_USE_COMPLEX)
3075           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3076 #else
3077           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3078 #endif
3079           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3080           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3081           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3082           k = temp_constraints;
3083           if (k > size_of_constraint) k = size_of_constraint;
3084           j = 0;
3085           while (j < k && singular_vals[k-j-1] < tol) j++;
3086           valid_constraints = k-j;
3087           total_counts = total_counts-temp_constraints+valid_constraints;
3088 #endif /* on missing GESVD */
3089         }
3090       }
3091       /* update pointers information */
3092       if (valid_constraints) {
3093         constraints_n[total_counts_cc] = valid_constraints;
3094         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3095         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3096         /* set change_of_basis flag */
3097         if (boolforchange) {
3098           PetscBTSet(change_basis,total_counts_cc);
3099         }
3100         total_counts_cc++;
3101       }
3102     }
3103     /* free workspace */
3104     if (!skip_lapack) {
3105       ierr = PetscFree(work);CHKERRQ(ierr);
3106 #if defined(PETSC_USE_COMPLEX)
3107       ierr = PetscFree(rwork);CHKERRQ(ierr);
3108 #endif
3109       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3110 #if defined(PETSC_MISSING_LAPACK_GESVD)
3111       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3112       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3113 #endif
3114     }
3115     for (k=0;k<nnsp_size;k++) {
3116       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3117     }
3118     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3119     /* free index sets of faces, edges and vertices */
3120     for (i=0;i<n_ISForFaces;i++) {
3121       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3122     }
3123     if (n_ISForFaces) {
3124       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3125     }
3126     for (i=0;i<n_ISForEdges;i++) {
3127       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3128     }
3129     if (n_ISForEdges) {
3130       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3131     }
3132     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3133   } else {
3134     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3135 
3136     total_counts = 0;
3137     n_vertices = 0;
3138     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3139       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3140     }
3141     max_constraints = 0;
3142     total_counts_cc = 0;
3143     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3144       total_counts += pcbddc->adaptive_constraints_n[i];
3145       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3146       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3147     }
3148     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3149     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3150     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3151     constraints_data = pcbddc->adaptive_constraints_data;
3152     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3153     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3154     total_counts_cc = 0;
3155     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3156       if (pcbddc->adaptive_constraints_n[i]) {
3157         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3158       }
3159     }
3160 #if 0
3161     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3162     for (i=0;i<total_counts_cc;i++) {
3163       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3164       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3165       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3166         printf(" %d",constraints_idxs[j]);
3167       }
3168       printf("\n");
3169       printf("number of cc: %d\n",constraints_n[i]);
3170     }
3171     for (i=0;i<n_vertices;i++) {
3172       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3173     }
3174     for (i=0;i<sub_schurs->n_subs;i++) {
3175       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3176     }
3177 #endif
3178 
3179     max_size_of_constraint = 0;
3180     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3181     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3182     /* Change of basis */
3183     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3184     if (pcbddc->use_change_of_basis) {
3185       for (i=0;i<sub_schurs->n_subs;i++) {
3186         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3187           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3188         }
3189       }
3190     }
3191   }
3192   pcbddc->local_primal_size = total_counts;
3193   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3194 
3195   /* map constraints_idxs in boundary numbering */
3196   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3197   if (i != constraints_idxs_ptr[total_counts_cc]) {
3198     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3199   }
3200 
3201   /* Create constraint matrix */
3202   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3203   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3204   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3205 
3206   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3207   /* determine if a QR strategy is needed for change of basis */
3208   qr_needed = PETSC_FALSE;
3209   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3210   total_primal_vertices=0;
3211   pcbddc->local_primal_size_cc = 0;
3212   for (i=0;i<total_counts_cc;i++) {
3213     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3214     if (size_of_constraint == 1) {
3215       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3216       pcbddc->local_primal_size_cc += 1;
3217     } else if (PetscBTLookup(change_basis,i)) {
3218       for (k=0;k<constraints_n[i];k++) {
3219         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3220       }
3221       pcbddc->local_primal_size_cc += constraints_n[i];
3222       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3223         PetscBTSet(qr_needed_idx,i);
3224         qr_needed = PETSC_TRUE;
3225       }
3226     } else {
3227       pcbddc->local_primal_size_cc += 1;
3228     }
3229   }
3230   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3231   pcbddc->n_vertices = total_primal_vertices;
3232   /* permute indices in order to have a sorted set of vertices */
3233   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3234 
3235   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3236   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3237   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3238 
3239   /* nonzero structure of constraint matrix */
3240   /* and get reference dof for local constraints */
3241   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3242   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3243 
3244   j = total_primal_vertices;
3245   total_counts = total_primal_vertices;
3246   cum = total_primal_vertices;
3247   for (i=n_vertices;i<total_counts_cc;i++) {
3248     if (!PetscBTLookup(change_basis,i)) {
3249       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3250       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3251       cum++;
3252       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3253       for (k=0;k<constraints_n[i];k++) {
3254         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3255         nnz[j+k] = size_of_constraint;
3256       }
3257       j += constraints_n[i];
3258     }
3259   }
3260   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3261   ierr = PetscFree(nnz);CHKERRQ(ierr);
3262 
3263   /* set values in constraint matrix */
3264   for (i=0;i<total_primal_vertices;i++) {
3265     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3266   }
3267   total_counts = total_primal_vertices;
3268   for (i=n_vertices;i<total_counts_cc;i++) {
3269     if (!PetscBTLookup(change_basis,i)) {
3270       PetscInt *cols;
3271 
3272       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3273       cols = constraints_idxs+constraints_idxs_ptr[i];
3274       for (k=0;k<constraints_n[i];k++) {
3275         PetscInt    row = total_counts+k;
3276         PetscScalar *vals;
3277 
3278         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3279         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3280       }
3281       total_counts += constraints_n[i];
3282     }
3283   }
3284   /* assembling */
3285   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287 
3288   /*
3289   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3290   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3291   */
3292   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3293   if (pcbddc->use_change_of_basis) {
3294     /* dual and primal dofs on a single cc */
3295     PetscInt     dual_dofs,primal_dofs;
3296     /* working stuff for GEQRF */
3297     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3298     PetscBLASInt lqr_work;
3299     /* working stuff for UNGQR */
3300     PetscScalar  *gqr_work,lgqr_work_t;
3301     PetscBLASInt lgqr_work;
3302     /* working stuff for TRTRS */
3303     PetscScalar  *trs_rhs;
3304     PetscBLASInt Blas_NRHS;
3305     /* pointers for values insertion into change of basis matrix */
3306     PetscInt     *start_rows,*start_cols;
3307     PetscScalar  *start_vals;
3308     /* working stuff for values insertion */
3309     PetscBT      is_primal;
3310     PetscInt     *aux_primal_numbering_B;
3311     /* matrix sizes */
3312     PetscInt     global_size,local_size;
3313     /* temporary change of basis */
3314     Mat          localChangeOfBasisMatrix;
3315     /* extra space for debugging */
3316     PetscScalar  *dbg_work;
3317 
3318     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3319     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3320     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3321     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3322     /* nonzeros for local mat */
3323     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3324     for (i=0;i<pcis->n;i++) nnz[i]=1;
3325     for (i=n_vertices;i<total_counts_cc;i++) {
3326       if (PetscBTLookup(change_basis,i)) {
3327         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3328         if (PetscBTLookup(qr_needed_idx,i)) {
3329           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3330         } else {
3331           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3332           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3333         }
3334       }
3335     }
3336     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3337     ierr = PetscFree(nnz);CHKERRQ(ierr);
3338     /* Set initial identity in the matrix */
3339     for (i=0;i<pcis->n;i++) {
3340       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3341     }
3342 
3343     if (pcbddc->dbg_flag) {
3344       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3345       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3346     }
3347 
3348 
3349     /* Now we loop on the constraints which need a change of basis */
3350     /*
3351        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3352        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3353 
3354        Basic blocks of change of basis matrix T computed by
3355 
3356           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3357 
3358             | 1        0   ...        0         s_1/S |
3359             | 0        1   ...        0         s_2/S |
3360             |              ...                        |
3361             | 0        ...            1     s_{n-1}/S |
3362             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3363 
3364             with S = \sum_{i=1}^n s_i^2
3365             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3366                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3367 
3368           - QR decomposition of constraints otherwise
3369     */
3370     if (qr_needed) {
3371       /* space to store Q */
3372       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3373       /* first we issue queries for optimal work */
3374       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3375       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3376       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3377       lqr_work = -1;
3378       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3379       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3380       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3381       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3382       lgqr_work = -1;
3383       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3384       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3385       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3386       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3387       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3388       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3389       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3390       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3391       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3392       /* array to store scaling factors for reflectors */
3393       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3394       /* array to store rhs and solution of triangular solver */
3395       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3396       /* allocating workspace for check */
3397       if (pcbddc->dbg_flag) {
3398         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3399       }
3400     }
3401     /* array to store whether a node is primal or not */
3402     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3403     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3404     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3405     if (i != total_primal_vertices) {
3406       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3407     }
3408     for (i=0;i<total_primal_vertices;i++) {
3409       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3410     }
3411     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3412 
3413     /* loop on constraints and see whether or not they need a change of basis and compute it */
3414     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3415       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3416       if (PetscBTLookup(change_basis,total_counts)) {
3417         /* get constraint info */
3418         primal_dofs = constraints_n[total_counts];
3419         dual_dofs = size_of_constraint-primal_dofs;
3420 
3421         if (pcbddc->dbg_flag) {
3422           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3423         }
3424 
3425         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3426 
3427           /* copy quadrature constraints for change of basis check */
3428           if (pcbddc->dbg_flag) {
3429             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3430           }
3431           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3432           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3433 
3434           /* compute QR decomposition of constraints */
3435           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3436           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3437           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3438           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3439           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3440           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3441           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3442 
3443           /* explictly compute R^-T */
3444           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3445           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3446           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3447           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3448           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3449           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3450           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3451           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3452           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3453           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3454 
3455           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3456           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3457           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3458           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3459           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3460           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3461           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3462           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3463           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3464 
3465           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3466              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3467              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3468           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3469           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3470           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3471           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3472           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3473           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3474           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3475           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3476           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3477           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3478 
3479           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3480           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3481           /* insert cols for primal dofs */
3482           for (j=0;j<primal_dofs;j++) {
3483             start_vals = &qr_basis[j*size_of_constraint];
3484             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3485             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3486           }
3487           /* insert cols for dual dofs */
3488           for (j=0,k=0;j<dual_dofs;k++) {
3489             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3490               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3491               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3492               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3493               j++;
3494             }
3495           }
3496 
3497           /* check change of basis */
3498           if (pcbddc->dbg_flag) {
3499             PetscInt   ii,jj;
3500             PetscBool valid_qr=PETSC_TRUE;
3501             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3502             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3503             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3504             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3505             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3506             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3507             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3508             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3509             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3510             for (jj=0;jj<size_of_constraint;jj++) {
3511               for (ii=0;ii<primal_dofs;ii++) {
3512                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3513                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3514               }
3515             }
3516             if (!valid_qr) {
3517               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3518               for (jj=0;jj<size_of_constraint;jj++) {
3519                 for (ii=0;ii<primal_dofs;ii++) {
3520                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3521                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3522                   }
3523                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3524                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3525                   }
3526                 }
3527               }
3528             } else {
3529               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3530             }
3531           }
3532         } else { /* simple transformation block */
3533           PetscInt    row,col;
3534           PetscScalar val,norm;
3535 
3536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3537           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3538           for (j=0;j<size_of_constraint;j++) {
3539             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3540             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3541             if (!PetscBTLookup(is_primal,row_B)) {
3542               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3543               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3544               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3545             } else {
3546               for (k=0;k<size_of_constraint;k++) {
3547                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3548                 if (row != col) {
3549                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3550                 } else {
3551                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3552                 }
3553                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3554               }
3555             }
3556           }
3557           if (pcbddc->dbg_flag) {
3558             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3559           }
3560         }
3561       } else {
3562         if (pcbddc->dbg_flag) {
3563           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3564         }
3565       }
3566     }
3567 
3568     /* free workspace */
3569     if (qr_needed) {
3570       if (pcbddc->dbg_flag) {
3571         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3572       }
3573       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3574       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3575       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3576       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3577       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3578     }
3579     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3580     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3581     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3582 
3583     /* assembling of global change of variable */
3584     {
3585       Mat      tmat;
3586       PetscInt bs;
3587 
3588       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3589       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3590       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3591       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3592       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3593       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3594       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3595       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3596       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3597       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3598       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3599       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3600       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3601       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3602       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3603       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3604       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3605       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3606     }
3607     /* check */
3608     if (pcbddc->dbg_flag) {
3609       PetscReal error;
3610       Vec       x,x_change;
3611 
3612       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3613       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3614       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3615       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3616       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3617       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3618       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3619       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3620       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3621       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3622       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3623       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3624       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3625       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3626       ierr = VecDestroy(&x);CHKERRQ(ierr);
3627       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3628     }
3629 
3630     /* adapt sub_schurs computed (if any) */
3631     if (pcbddc->use_deluxe_scaling) {
3632       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3633       if (sub_schurs->S_Ej_all) {
3634         Mat                    S_new,tmat;
3635         ISLocalToGlobalMapping NtoSall;
3636         IS                     is_all_N,is_V,is_V_Sall;
3637         const PetscScalar      *array;
3638         const PetscInt         *idxs_V,*idxs_all;
3639         PetscInt               i,n_V;
3640 
3641         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3642         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3643         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3644         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3645         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3646         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3647         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3648         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3649         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3650         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3651         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3652         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3653         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3654         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3655         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3656         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3657         for (i=0;i<n_V;i++) {
3658           PetscScalar val;
3659           PetscInt    idx;
3660 
3661           idx = idxs_V[i];
3662           val = array[idxs_all[idxs_V[i]]];
3663           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3664         }
3665         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3666         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3667         sub_schurs->S_Ej_all = S_new;
3668         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3669         if (sub_schurs->sum_S_Ej_all) {
3670           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3671           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3672           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3673           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3674           sub_schurs->sum_S_Ej_all = S_new;
3675           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3676         }
3677         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3678         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3679         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3680         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3681         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3682       }
3683     }
3684     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3685   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3686     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3687     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3688   }
3689 
3690   /* set up change of basis context */
3691   if (pcbddc->ChangeOfBasisMatrix) {
3692     PCBDDCChange_ctx change_ctx;
3693 
3694     if (!pcbddc->new_global_mat) {
3695       PetscInt global_size,local_size;
3696 
3697       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3698       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3699       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3700       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3701       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3702       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3703       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3704       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3705       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3706     } else {
3707       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3708       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3709       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3710     }
3711     if (!pcbddc->user_ChangeOfBasisMatrix) {
3712       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3713       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3714     } else {
3715       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3716       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3717     }
3718     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3719     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3720     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3722   }
3723 
3724   /* add pressure dofs to set of primal nodes for numbering purposes */
3725   for (i=0;i<pcbddc->benign_n;i++) {
3726     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3727     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3728     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3729     pcbddc->local_primal_size_cc++;
3730     pcbddc->local_primal_size++;
3731   }
3732 
3733   /* check if a new primal space has been introduced (also take into account benign trick) */
3734   pcbddc->new_primal_space_local = PETSC_TRUE;
3735   if (olocal_primal_size == pcbddc->local_primal_size) {
3736     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3737     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3738     if (!pcbddc->new_primal_space_local) {
3739       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3740       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3741     }
3742   }
3743   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3744   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3745   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3746 
3747   /* flush dbg viewer */
3748   if (pcbddc->dbg_flag) {
3749     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3750   }
3751 
3752   /* free workspace */
3753   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3754   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3755   if (!pcbddc->adaptive_selection) {
3756     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3757     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3758   } else {
3759     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3760                       pcbddc->adaptive_constraints_idxs_ptr,
3761                       pcbddc->adaptive_constraints_data_ptr,
3762                       pcbddc->adaptive_constraints_idxs,
3763                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3764     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3765     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3766   }
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3772 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3773 {
3774   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3775   PC_IS       *pcis = (PC_IS*)pc->data;
3776   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3777   PetscInt    ierr,i,vertex_size,N;
3778   PetscViewer viewer=pcbddc->dbg_viewer;
3779 
3780   PetscFunctionBegin;
3781   /* Reset previously computed graph */
3782   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3783   /* Init local Graph struct */
3784   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3785   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3786 
3787   /* Check validity of the csr graph passed in by the user */
3788   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3789     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3790   }
3791 
3792   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3793   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3794     PetscInt  *xadj,*adjncy;
3795     PetscInt  nvtxs;
3796     PetscBool flg_row=PETSC_FALSE;
3797 
3798     if (pcbddc->use_local_adj) {
3799 
3800       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3801       if (flg_row) {
3802         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3803         pcbddc->computed_rowadj = PETSC_TRUE;
3804       }
3805       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3806     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3807       IS                     is_dummy;
3808       ISLocalToGlobalMapping l2gmap_dummy;
3809       PetscInt               j,sum;
3810       PetscInt               *cxadj,*cadjncy;
3811       const PetscInt         *idxs;
3812       PCBDDCGraph            graph;
3813       PetscBT                is_on_boundary;
3814 
3815       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3816       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3817       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3818       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3819       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3820       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3821       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3822       if (flg_row) {
3823         graph->xadj = xadj;
3824         graph->adjncy = adjncy;
3825       }
3826       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3827       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3828       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3829 
3830       if (pcbddc->dbg_flag) {
3831         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3832         for (i=0;i<graph->ncc;i++) {
3833           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3834         }
3835       }
3836 
3837       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3838       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3839       for (i=0;i<pcis->n_B;i++) {
3840         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3841       }
3842       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3843 
3844       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3845       sum = 0;
3846       for (i=0;i<graph->ncc;i++) {
3847         PetscInt sizecc = 0;
3848         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3849           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3850             sizecc++;
3851           }
3852         }
3853         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3854           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3855             cxadj[graph->queue[j]] = sizecc;
3856           }
3857         }
3858         sum += sizecc*sizecc;
3859       }
3860       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3861       sum = 0;
3862       for (i=0;i<pcis->n;i++) {
3863         PetscInt temp = cxadj[i];
3864         cxadj[i] = sum;
3865         sum += temp;
3866       }
3867       cxadj[pcis->n] = sum;
3868       for (i=0;i<graph->ncc;i++) {
3869         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3870           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3871             PetscInt k,sizecc = 0;
3872             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3873               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3874                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3875                 sizecc++;
3876               }
3877             }
3878           }
3879         }
3880       }
3881       if (sum) {
3882         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3883       } else {
3884         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3885         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3886       }
3887       graph->xadj = 0;
3888       graph->adjncy = 0;
3889       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3890       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3891     }
3892   }
3893   if (pcbddc->dbg_flag) {
3894     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3895   }
3896 
3897   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3898   vertex_size = 1;
3899   if (pcbddc->user_provided_isfordofs) {
3900     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3901       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3902       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3903         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3904         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3905       }
3906       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3907       pcbddc->n_ISForDofs = 0;
3908       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3909     }
3910     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3911     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3912   } else {
3913     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3914       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3915       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3916       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3917         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3918       }
3919     }
3920   }
3921 
3922   /* Setup of Graph */
3923   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3924     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3925   }
3926   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3927     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3928   }
3929   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */
3930     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3931   }
3932   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3933 
3934   /* attach info on disconnected subdomains if present */
3935   if (pcbddc->n_local_subs) {
3936     PetscInt *local_subs;
3937 
3938     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3939     for (i=0;i<pcbddc->n_local_subs;i++) {
3940       const PetscInt *idxs;
3941       PetscInt       nl,j;
3942 
3943       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3944       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3945       for (j=0;j<nl;j++) {
3946         local_subs[idxs[j]] = i;
3947       }
3948       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3949     }
3950     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3951     pcbddc->mat_graph->local_subs = local_subs;
3952   }
3953 
3954   /* Graph's connected components analysis */
3955   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3956 
3957   /* print some info to stdout */
3958   if (pcbddc->dbg_flag) {
3959     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3960   }
3961 
3962   /* mark topography has done */
3963   pcbddc->recompute_topography = PETSC_FALSE;
3964   PetscFunctionReturn(0);
3965 }
3966 
3967 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3968 #undef __FUNCT__
3969 #define __FUNCT__ "PCBDDCSubsetNumbering"
3970 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3971 {
3972   PetscSF        sf;
3973   PetscLayout    map;
3974   const PetscInt *idxs;
3975   PetscInt       *leaf_data,*root_data,*gidxs;
3976   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3977   PetscInt       n_n,nlocals,start,first_index;
3978   PetscMPIInt    commsize;
3979   PetscBool      first_found;
3980   PetscErrorCode ierr;
3981 
3982   PetscFunctionBegin;
3983   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3984   if (subset_mult) {
3985     PetscCheckSameComm(subset,1,subset_mult,2);
3986     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3987     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3988   }
3989   /* create workspace layout for computing global indices of subset */
3990   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3991   lbounds[0] = lbounds[1] = 0;
3992   for (i=0;i<n;i++) {
3993     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3994     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3995   }
3996   lbounds[0] = -lbounds[0];
3997   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3998   gbounds[0] = -gbounds[0];
3999   N = gbounds[1] - gbounds[0] + 1;
4000   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
4001   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
4002   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
4003   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
4004   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
4005 
4006   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
4007   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
4008   if (subset_mult) {
4009     const PetscInt* idxs_mult;
4010 
4011     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4012     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
4013     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4014   } else {
4015     for (i=0;i<n;i++) leaf_data[i] = 1;
4016   }
4017   /* local size of new subset */
4018   n_n = 0;
4019   for (i=0;i<n;i++) n_n += leaf_data[i];
4020 
4021   /* global indexes in layout */
4022   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
4023   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
4024   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
4025   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4026   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4027   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4028 
4029   /* reduce from leaves to roots */
4030   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4031   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4032   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4033 
4034   /* count indexes in local part of layout */
4035   nlocals = 0;
4036   first_index = -1;
4037   first_found = PETSC_FALSE;
4038   for (i=0;i<Nl;i++) {
4039     if (!first_found && root_data[i]) {
4040       first_found = PETSC_TRUE;
4041       first_index = i;
4042     }
4043     nlocals += root_data[i];
4044   }
4045 
4046   /* cumulative of number of indexes and size of subset without holes */
4047 #if defined(PETSC_HAVE_MPI_EXSCAN)
4048   start = 0;
4049   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4050 #else
4051   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4052   start = start-nlocals;
4053 #endif
4054 
4055   if (N_n) { /* compute total size of new subset if requested */
4056     *N_n = start + nlocals;
4057     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4058     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4059   }
4060 
4061   /* adapt root data with cumulative */
4062   if (first_found) {
4063     PetscInt old_index;
4064 
4065     root_data[first_index] += start;
4066     old_index = first_index;
4067     for (i=first_index+1;i<Nl;i++) {
4068       if (root_data[i]) {
4069         root_data[i] += root_data[old_index];
4070         old_index = i;
4071       }
4072     }
4073   }
4074 
4075   /* from roots to leaves */
4076   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4077   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4078   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4079 
4080   /* create new IS with global indexes without holes */
4081   if (subset_mult) {
4082     const PetscInt* idxs_mult;
4083     PetscInt        cum;
4084 
4085     cum = 0;
4086     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4087     for (i=0;i<n;i++) {
4088       PetscInt j;
4089       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4090     }
4091     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4092   } else {
4093     for (i=0;i<n;i++) {
4094       gidxs[i] = leaf_data[i]-1;
4095     }
4096   }
4097   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4098   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4099   PetscFunctionReturn(0);
4100 }
4101 
4102 #undef __FUNCT__
4103 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4104 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4105 {
4106   PetscInt       i,j;
4107   PetscScalar    *alphas;
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   /* this implements stabilized Gram-Schmidt */
4112   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4113   for (i=0;i<n;i++) {
4114     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4115     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4116     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4117   }
4118   ierr = PetscFree(alphas);CHKERRQ(ierr);
4119   PetscFunctionReturn(0);
4120 }
4121 
4122 #undef __FUNCT__
4123 #define __FUNCT__ "MatISGetSubassemblingPattern"
4124 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4125 {
4126   IS             ranks_send_to;
4127   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4128   PetscMPIInt    size,rank,color;
4129   PetscInt       *xadj,*adjncy;
4130   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4131   PetscInt       i,local_size,threshold=0;
4132   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4133   PetscSubcomm   subcomm;
4134   PetscErrorCode ierr;
4135 
4136   PetscFunctionBegin;
4137   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4138   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4139   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4140 
4141   /* Get info on mapping */
4142   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4143   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4144 
4145   /* build local CSR graph of subdomains' connectivity */
4146   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4147   xadj[0] = 0;
4148   xadj[1] = PetscMax(n_neighs-1,0);
4149   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4150   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4151 
4152   if (threshold) {
4153     PetscInt xadj_count = 0;
4154     for (i=1;i<n_neighs;i++) {
4155       if (n_shared[i] > threshold) {
4156         adjncy[xadj_count] = neighs[i];
4157         adjncy_wgt[xadj_count] = n_shared[i];
4158         xadj_count++;
4159       }
4160     }
4161     xadj[1] = xadj_count;
4162   } else {
4163     if (xadj[1]) {
4164       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4165       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4166     }
4167   }
4168   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4169   if (use_square) {
4170     for (i=0;i<xadj[1];i++) {
4171       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4172     }
4173   }
4174   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4175 
4176   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4177 
4178   /*
4179     Restrict work on active processes only.
4180   */
4181   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4182   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4183   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4184   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4185   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4186   if (color) {
4187     ierr = PetscFree(xadj);CHKERRQ(ierr);
4188     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4189     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4190   } else {
4191     Mat             subdomain_adj;
4192     IS              new_ranks,new_ranks_contig;
4193     MatPartitioning partitioner;
4194     PetscInt        prank,rstart=0,rend=0;
4195     PetscInt        *is_indices,*oldranks;
4196     PetscBool       aggregate;
4197 
4198     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4199     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4200     prank = rank;
4201     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4202     /*
4203     for (i=0;i<size;i++) {
4204       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4205     }
4206     */
4207     for (i=0;i<xadj[1];i++) {
4208       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4209     }
4210     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4211     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4212     if (aggregate) {
4213       PetscInt    lrows,row,ncols,*cols;
4214       PetscMPIInt nrank;
4215       PetscScalar *vals;
4216 
4217       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4218       lrows = 0;
4219       if (nrank<redprocs) {
4220         lrows = size/redprocs;
4221         if (nrank<size%redprocs) lrows++;
4222       }
4223       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4224       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4225       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4226       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4227       row = nrank;
4228       ncols = xadj[1]-xadj[0];
4229       cols = adjncy;
4230       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4231       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4232       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4233       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4235       ierr = PetscFree(xadj);CHKERRQ(ierr);
4236       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4237       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4238       ierr = PetscFree(vals);CHKERRQ(ierr);
4239     } else {
4240       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4241     }
4242     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4243 
4244     /* Partition */
4245     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4246     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4247     if (use_vwgt) {
4248       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4249       v_wgt[0] = local_size;
4250       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4251     }
4252     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4253     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4254     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4255     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4256     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4257 
4258     /* renumber new_ranks to avoid "holes" in new set of processors */
4259     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4260     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4261     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4262     if (!redprocs) {
4263       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4264     } else {
4265       PetscInt    idxs[1];
4266       PetscMPIInt tag;
4267       MPI_Request *reqs;
4268 
4269       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4270       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4271       for (i=rstart;i<rend;i++) {
4272         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4273       }
4274       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4275       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4276       ierr = PetscFree(reqs);CHKERRQ(ierr);
4277       ranks_send_to_idx[0] = oldranks[idxs[0]];
4278     }
4279     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4280     /* clean up */
4281     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4282     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4283     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4284     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4285   }
4286   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4287 
4288   /* assemble parallel IS for sends */
4289   i = 1;
4290   if (color) i=0;
4291   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4292   /* get back IS */
4293   *is_sends = ranks_send_to;
4294   PetscFunctionReturn(0);
4295 }
4296 
4297 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4298 
4299 #undef __FUNCT__
4300 #define __FUNCT__ "MatISSubassemble"
4301 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4302 {
4303   Mat                    local_mat;
4304   IS                     is_sends_internal;
4305   PetscInt               rows,cols,new_local_rows;
4306   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4307   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4308   ISLocalToGlobalMapping l2gmap;
4309   PetscInt*              l2gmap_indices;
4310   const PetscInt*        is_indices;
4311   MatType                new_local_type;
4312   /* buffers */
4313   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4314   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4315   PetscInt               *recv_buffer_idxs_local;
4316   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4317   /* MPI */
4318   MPI_Comm               comm,comm_n;
4319   PetscSubcomm           subcomm;
4320   PetscMPIInt            n_sends,n_recvs,commsize;
4321   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4322   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4323   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4324   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4325   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4326   PetscErrorCode         ierr;
4327 
4328   PetscFunctionBegin;
4329   /* TODO: add missing checks */
4330   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4331   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4332   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4333   PetscValidLogicalCollectiveInt(mat,nis,7);
4334   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4335   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4336   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4337   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4338   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4339   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4340   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4341   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4342     PetscInt mrows,mcols,mnrows,mncols;
4343     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4344     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4345     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4346     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4347     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4348     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4349   }
4350   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4351   PetscValidLogicalCollectiveInt(mat,bs,0);
4352   /* prepare IS for sending if not provided */
4353   if (!is_sends) {
4354     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4355     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4356   } else {
4357     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4358     is_sends_internal = is_sends;
4359   }
4360 
4361   /* get comm */
4362   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4363 
4364   /* compute number of sends */
4365   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4366   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4367 
4368   /* compute number of receives */
4369   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4370   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4371   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4372   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4373   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4374   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4375   ierr = PetscFree(iflags);CHKERRQ(ierr);
4376 
4377   /* restrict comm if requested */
4378   subcomm = 0;
4379   destroy_mat = PETSC_FALSE;
4380   if (restrict_comm) {
4381     PetscMPIInt color,subcommsize;
4382 
4383     color = 0;
4384     if (restrict_full) {
4385       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4386     } else {
4387       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4388     }
4389     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4390     subcommsize = commsize - subcommsize;
4391     /* check if reuse has been requested */
4392     if (reuse == MAT_REUSE_MATRIX) {
4393       if (*mat_n) {
4394         PetscMPIInt subcommsize2;
4395         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4396         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4397         comm_n = PetscObjectComm((PetscObject)*mat_n);
4398       } else {
4399         comm_n = PETSC_COMM_SELF;
4400       }
4401     } else { /* MAT_INITIAL_MATRIX */
4402       PetscMPIInt rank;
4403 
4404       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4405       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4406       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4407       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4408       comm_n = PetscSubcommChild(subcomm);
4409     }
4410     /* flag to destroy *mat_n if not significative */
4411     if (color) destroy_mat = PETSC_TRUE;
4412   } else {
4413     comm_n = comm;
4414   }
4415 
4416   /* prepare send/receive buffers */
4417   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4418   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4419   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4420   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4421   if (nis) {
4422     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4423   }
4424 
4425   /* Get data from local matrices */
4426   if (!isdense) {
4427     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4428     /* TODO: See below some guidelines on how to prepare the local buffers */
4429     /*
4430        send_buffer_vals should contain the raw values of the local matrix
4431        send_buffer_idxs should contain:
4432        - MatType_PRIVATE type
4433        - PetscInt        size_of_l2gmap
4434        - PetscInt        global_row_indices[size_of_l2gmap]
4435        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4436     */
4437   } else {
4438     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4439     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4440     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4441     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4442     send_buffer_idxs[1] = i;
4443     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4444     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4445     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4446     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4447     for (i=0;i<n_sends;i++) {
4448       ilengths_vals[is_indices[i]] = len*len;
4449       ilengths_idxs[is_indices[i]] = len+2;
4450     }
4451   }
4452   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4453   /* additional is (if any) */
4454   if (nis) {
4455     PetscMPIInt psum;
4456     PetscInt j;
4457     for (j=0,psum=0;j<nis;j++) {
4458       PetscInt plen;
4459       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4460       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4461       psum += len+1; /* indices + lenght */
4462     }
4463     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4464     for (j=0,psum=0;j<nis;j++) {
4465       PetscInt plen;
4466       const PetscInt *is_array_idxs;
4467       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4468       send_buffer_idxs_is[psum] = plen;
4469       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4470       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4471       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4472       psum += plen+1; /* indices + lenght */
4473     }
4474     for (i=0;i<n_sends;i++) {
4475       ilengths_idxs_is[is_indices[i]] = psum;
4476     }
4477     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4478   }
4479 
4480   buf_size_idxs = 0;
4481   buf_size_vals = 0;
4482   buf_size_idxs_is = 0;
4483   for (i=0;i<n_recvs;i++) {
4484     buf_size_idxs += (PetscInt)olengths_idxs[i];
4485     buf_size_vals += (PetscInt)olengths_vals[i];
4486     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4487   }
4488   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4489   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4490   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4491 
4492   /* get new tags for clean communications */
4493   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4494   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4495   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4496 
4497   /* allocate for requests */
4498   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4499   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4500   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4501   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4502   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4503   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4504 
4505   /* communications */
4506   ptr_idxs = recv_buffer_idxs;
4507   ptr_vals = recv_buffer_vals;
4508   ptr_idxs_is = recv_buffer_idxs_is;
4509   for (i=0;i<n_recvs;i++) {
4510     source_dest = onodes[i];
4511     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4512     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4513     ptr_idxs += olengths_idxs[i];
4514     ptr_vals += olengths_vals[i];
4515     if (nis) {
4516       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4517       ptr_idxs_is += olengths_idxs_is[i];
4518     }
4519   }
4520   for (i=0;i<n_sends;i++) {
4521     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4522     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4523     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4524     if (nis) {
4525       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4526     }
4527   }
4528   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4529   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4530 
4531   /* assemble new l2g map */
4532   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4533   ptr_idxs = recv_buffer_idxs;
4534   new_local_rows = 0;
4535   for (i=0;i<n_recvs;i++) {
4536     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4537     ptr_idxs += olengths_idxs[i];
4538   }
4539   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4540   ptr_idxs = recv_buffer_idxs;
4541   new_local_rows = 0;
4542   for (i=0;i<n_recvs;i++) {
4543     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4544     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4545     ptr_idxs += olengths_idxs[i];
4546   }
4547   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4548   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4549   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4550 
4551   /* infer new local matrix type from received local matrices type */
4552   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4553   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4554   if (n_recvs) {
4555     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4556     ptr_idxs = recv_buffer_idxs;
4557     for (i=0;i<n_recvs;i++) {
4558       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4559         new_local_type_private = MATAIJ_PRIVATE;
4560         break;
4561       }
4562       ptr_idxs += olengths_idxs[i];
4563     }
4564     switch (new_local_type_private) {
4565       case MATDENSE_PRIVATE:
4566         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4567           new_local_type = MATSEQAIJ;
4568           bs = 1;
4569         } else { /* if I receive only 1 dense matrix */
4570           new_local_type = MATSEQDENSE;
4571           bs = 1;
4572         }
4573         break;
4574       case MATAIJ_PRIVATE:
4575         new_local_type = MATSEQAIJ;
4576         bs = 1;
4577         break;
4578       case MATBAIJ_PRIVATE:
4579         new_local_type = MATSEQBAIJ;
4580         break;
4581       case MATSBAIJ_PRIVATE:
4582         new_local_type = MATSEQSBAIJ;
4583         break;
4584       default:
4585         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4586         break;
4587     }
4588   } else { /* by default, new_local_type is seqdense */
4589     new_local_type = MATSEQDENSE;
4590     bs = 1;
4591   }
4592 
4593   /* create MATIS object if needed */
4594   if (reuse == MAT_INITIAL_MATRIX) {
4595     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4596     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4597   } else {
4598     /* it also destroys the local matrices */
4599     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4600   }
4601   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4602   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4603 
4604   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4605 
4606   /* Global to local map of received indices */
4607   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4608   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4609   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4610 
4611   /* restore attributes -> type of incoming data and its size */
4612   buf_size_idxs = 0;
4613   for (i=0;i<n_recvs;i++) {
4614     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4615     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4616     buf_size_idxs += (PetscInt)olengths_idxs[i];
4617   }
4618   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4619 
4620   /* set preallocation */
4621   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4622   if (!newisdense) {
4623     PetscInt *new_local_nnz=0;
4624 
4625     ptr_vals = recv_buffer_vals;
4626     ptr_idxs = recv_buffer_idxs_local;
4627     if (n_recvs) {
4628       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4629     }
4630     for (i=0;i<n_recvs;i++) {
4631       PetscInt j;
4632       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4633         for (j=0;j<*(ptr_idxs+1);j++) {
4634           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4635         }
4636       } else {
4637         /* TODO */
4638       }
4639       ptr_idxs += olengths_idxs[i];
4640     }
4641     if (new_local_nnz) {
4642       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4643       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4644       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4645       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4646       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4647       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4648     } else {
4649       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4650     }
4651     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4652   } else {
4653     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4654   }
4655 
4656   /* set values */
4657   ptr_vals = recv_buffer_vals;
4658   ptr_idxs = recv_buffer_idxs_local;
4659   for (i=0;i<n_recvs;i++) {
4660     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4661       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4662       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4663       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4664       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4665       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4666     } else {
4667       /* TODO */
4668     }
4669     ptr_idxs += olengths_idxs[i];
4670     ptr_vals += olengths_vals[i];
4671   }
4672   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4673   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4674   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4675   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4676   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4677   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4678 
4679 #if 0
4680   if (!restrict_comm) { /* check */
4681     Vec       lvec,rvec;
4682     PetscReal infty_error;
4683 
4684     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4685     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4686     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4687     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4688     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4689     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4690     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4691     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4692     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4693   }
4694 #endif
4695 
4696   /* assemble new additional is (if any) */
4697   if (nis) {
4698     PetscInt **temp_idxs,*count_is,j,psum;
4699 
4700     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4701     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4702     ptr_idxs = recv_buffer_idxs_is;
4703     psum = 0;
4704     for (i=0;i<n_recvs;i++) {
4705       for (j=0;j<nis;j++) {
4706         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4707         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4708         psum += plen;
4709         ptr_idxs += plen+1; /* shift pointer to received data */
4710       }
4711     }
4712     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4713     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4714     for (i=1;i<nis;i++) {
4715       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4716     }
4717     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4718     ptr_idxs = recv_buffer_idxs_is;
4719     for (i=0;i<n_recvs;i++) {
4720       for (j=0;j<nis;j++) {
4721         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4722         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4723         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4724         ptr_idxs += plen+1; /* shift pointer to received data */
4725       }
4726     }
4727     for (i=0;i<nis;i++) {
4728       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4729       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4730       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4731     }
4732     ierr = PetscFree(count_is);CHKERRQ(ierr);
4733     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4734     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4735   }
4736   /* free workspace */
4737   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4738   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4739   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4740   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4741   if (isdense) {
4742     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4743     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4744   } else {
4745     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4746   }
4747   if (nis) {
4748     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4749     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4750   }
4751   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4752   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4753   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4754   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4755   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4756   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4757   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4758   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4759   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4760   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4761   ierr = PetscFree(onodes);CHKERRQ(ierr);
4762   if (nis) {
4763     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4764     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4765     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4766   }
4767   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4768   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4769     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4770     for (i=0;i<nis;i++) {
4771       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4772     }
4773     *mat_n = NULL;
4774   }
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 /* temporary hack into ksp private data structure */
4779 #include <petsc/private/kspimpl.h>
4780 
4781 #undef __FUNCT__
4782 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4783 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4784 {
4785   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4786   PC_IS                  *pcis = (PC_IS*)pc->data;
4787   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4788   MatNullSpace           CoarseNullSpace=NULL;
4789   ISLocalToGlobalMapping coarse_islg;
4790   IS                     coarse_is,*isarray;
4791   PetscInt               i,im_active=-1,active_procs=-1;
4792   PetscInt               nis,nisdofs,nisneu,nisvert;
4793   PC                     pc_temp;
4794   PCType                 coarse_pc_type;
4795   KSPType                coarse_ksp_type;
4796   PetscBool              multilevel_requested,multilevel_allowed;
4797   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4798   Mat                    t_coarse_mat_is;
4799   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4800   PetscMPIInt            all_procs;
4801   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4802   PetscBool              compute_vecs = PETSC_FALSE;
4803   PetscScalar            *array;
4804   PetscErrorCode         ierr;
4805 
4806   PetscFunctionBegin;
4807   /* Assign global numbering to coarse dofs */
4808   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4809     PetscInt ocoarse_size;
4810     compute_vecs = PETSC_TRUE;
4811     ocoarse_size = pcbddc->coarse_size;
4812     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4813     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4814     /* see if we can avoid some work */
4815     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4816       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4817       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4818         PC        pc;
4819         PetscBool isbddc;
4820 
4821         /* temporary workaround since PCBDDC does not have a reset method so far */
4822         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4823         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4824         if (isbddc) {
4825           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4826         }
4827         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4828         coarse_reuse = PETSC_FALSE;
4829       } else { /* we can safely reuse already computed coarse matrix */
4830         coarse_reuse = PETSC_TRUE;
4831       }
4832     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4833       coarse_reuse = PETSC_FALSE;
4834     }
4835     /* reset any subassembling information */
4836     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4837     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4838   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4839     coarse_reuse = PETSC_TRUE;
4840   }
4841 
4842   /* count "active" (i.e. with positive local size) and "void" processes */
4843   im_active = !!(pcis->n);
4844   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4845   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4846   void_procs = all_procs-active_procs;
4847   csin_type_simple = PETSC_TRUE;
4848   redist = PETSC_FALSE;
4849   if (pcbddc->current_level && void_procs) {
4850     csin_ml = PETSC_TRUE;
4851     ncoarse_ml = void_procs;
4852     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4853     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4854       csin_ds = PETSC_TRUE;
4855       ncoarse_ds = pcbddc->redistribute_coarse;
4856       redist = PETSC_TRUE;
4857     } else {
4858       csin_ds = PETSC_TRUE;
4859       ncoarse_ds = active_procs;
4860       redist = PETSC_TRUE;
4861     }
4862   } else {
4863     csin_ml = PETSC_FALSE;
4864     ncoarse_ml = all_procs;
4865     if (void_procs) {
4866       csin_ds = PETSC_TRUE;
4867       ncoarse_ds = void_procs;
4868       csin_type_simple = PETSC_FALSE;
4869     } else {
4870       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4871         csin_ds = PETSC_TRUE;
4872         ncoarse_ds = pcbddc->redistribute_coarse;
4873         redist = PETSC_TRUE;
4874       } else {
4875         csin_ds = PETSC_FALSE;
4876         ncoarse_ds = all_procs;
4877       }
4878     }
4879   }
4880 
4881   /*
4882     test if we can go multilevel: three conditions must be satisfied:
4883     - we have not exceeded the number of levels requested
4884     - we can actually subassemble the active processes
4885     - we can find a suitable number of MPI processes where we can place the subassembled problem
4886   */
4887   multilevel_allowed = PETSC_FALSE;
4888   multilevel_requested = PETSC_FALSE;
4889   if (pcbddc->current_level < pcbddc->max_levels) {
4890     multilevel_requested = PETSC_TRUE;
4891     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4892       multilevel_allowed = PETSC_FALSE;
4893     } else {
4894       multilevel_allowed = PETSC_TRUE;
4895     }
4896   }
4897   /* determine number of process partecipating to coarse solver */
4898   if (multilevel_allowed) {
4899     ncoarse = ncoarse_ml;
4900     csin = csin_ml;
4901     redist = PETSC_FALSE;
4902   } else {
4903     ncoarse = ncoarse_ds;
4904     csin = csin_ds;
4905   }
4906 
4907   /* creates temporary l2gmap and IS for coarse indexes */
4908   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4909   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4910 
4911   /* creates temporary MATIS object for coarse matrix */
4912   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4913   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4914   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4915   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4916   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4917   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4918   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4919   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4920   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4921 
4922   /* compute dofs splitting and neumann boundaries for coarse dofs */
4923   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */
4924     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4925     const PetscInt         *idxs;
4926     ISLocalToGlobalMapping tmap;
4927 
4928     /* create map between primal indices (in local representative ordering) and local primal numbering */
4929     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4930     /* allocate space for temporary storage */
4931     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4932     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4933     /* allocate for IS array */
4934     nisdofs = pcbddc->n_ISForDofsLocal;
4935     nisneu = !!pcbddc->NeumannBoundariesLocal;
4936     nisvert = 0;
4937     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4938       nisvert = 1;
4939     }
4940     nis = nisdofs + nisneu + nisvert;
4941     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4942     /* dofs splitting */
4943     for (i=0;i<nisdofs;i++) {
4944       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4945       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4946       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4947       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4948       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4949       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4950       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4951       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4952     }
4953     /* neumann boundaries */
4954     if (pcbddc->NeumannBoundariesLocal) {
4955       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4956       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4957       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4958       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4959       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4960       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4961       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4962       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4963     }
4964     /* primal vertices (benign) */
4965     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4966       ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr);
4967       ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4968       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4969       ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4970       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4971       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr);
4972     }
4973     /* free memory */
4974     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4975     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4976     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4977   } else {
4978     nis = 0;
4979     nisdofs = 0;
4980     nisneu = 0;
4981     nisvert = 0;
4982     isarray = NULL;
4983   }
4984   /* destroy no longer needed map */
4985   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4986 
4987   /* restrict on coarse candidates (if needed) */
4988   coarse_mat_is = NULL;
4989   if (csin) {
4990     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4991       if (redist) {
4992         PetscMPIInt rank;
4993         PetscInt    spc,n_spc_p1,dest[1],destsize;
4994 
4995         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4996         spc = active_procs/ncoarse;
4997         n_spc_p1 = active_procs%ncoarse;
4998         if (im_active) {
4999           destsize = 1;
5000           if (rank > n_spc_p1*(spc+1)-1) {
5001             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
5002           } else {
5003             dest[0] = rank/(spc+1);
5004           }
5005         } else {
5006           destsize = 0;
5007         }
5008         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5009       } else if (csin_type_simple) {
5010         PetscMPIInt rank;
5011         PetscInt    issize,isidx;
5012 
5013         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5014         if (im_active) {
5015           issize = 1;
5016           isidx = (PetscInt)rank;
5017         } else {
5018           issize = 0;
5019           isidx = -1;
5020         }
5021         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5022       } else { /* get a suitable subassembling pattern from MATIS code */
5023         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5024       }
5025 
5026       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
5027       if (!redist || ncoarse <= void_procs) {
5028         PetscInt ncoarse_cand,tissize,*nisindices;
5029         PetscInt *coarse_candidates;
5030         const PetscInt* tisindices;
5031 
5032         /* get coarse candidates' ranks in pc communicator */
5033         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5034         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5035         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5036           if (!coarse_candidates[i]) {
5037             coarse_candidates[ncoarse_cand++]=i;
5038           }
5039         }
5040         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5041 
5042 
5043         if (pcbddc->dbg_flag) {
5044           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5045           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5046           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5047           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5048           for (i=0;i<ncoarse_cand;i++) {
5049             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5050           }
5051           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5052           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5053         }
5054         /* shift the pattern on coarse candidates */
5055         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5056         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5057         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5058         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5059         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5060         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5061         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5062       }
5063       if (pcbddc->dbg_flag) {
5064         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5065         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5066         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5067         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5068       }
5069     }
5070     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5071     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5072       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5073     } else { /* this is the last level, so use just receiving processes in subcomm */
5074       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5075     }
5076   } else {
5077     if (pcbddc->dbg_flag) {
5078       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5079       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5080       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5081     }
5082     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5083     coarse_mat_is = t_coarse_mat_is;
5084   }
5085 
5086   /* create local to global scatters for coarse problem */
5087   if (compute_vecs) {
5088     PetscInt lrows;
5089     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5090     if (coarse_mat_is) {
5091       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5092     } else {
5093       lrows = 0;
5094     }
5095     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5096     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5097     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5098     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5099     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5100   }
5101   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5102   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5103 
5104   /* set defaults for coarse KSP and PC */
5105   if (multilevel_allowed) {
5106     coarse_ksp_type = KSPRICHARDSON;
5107     coarse_pc_type = PCBDDC;
5108   } else {
5109     coarse_ksp_type = KSPPREONLY;
5110     coarse_pc_type = PCREDUNDANT;
5111   }
5112 
5113   /* print some info if requested */
5114   if (pcbddc->dbg_flag) {
5115     if (!multilevel_allowed) {
5116       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5117       if (multilevel_requested) {
5118         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5119       } else if (pcbddc->max_levels) {
5120         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5121       }
5122       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5123     }
5124   }
5125 
5126   /* create the coarse KSP object only once with defaults */
5127   if (coarse_mat_is) {
5128     MatReuse coarse_mat_reuse;
5129     PetscViewer dbg_viewer = NULL;
5130     if (pcbddc->dbg_flag) {
5131       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5132       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5133     }
5134     if (!pcbddc->coarse_ksp) {
5135       char prefix[256],str_level[16];
5136       size_t len;
5137       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5138       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5139       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5140       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5141       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5142       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5143       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5144       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5145       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5146       /* prefix */
5147       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5148       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5149       if (!pcbddc->current_level) {
5150         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5151         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5152       } else {
5153         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5154         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5155         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5156         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5157         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5158         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5159       }
5160       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5161       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5162       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5163       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5164       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5165       /* allow user customization */
5166       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5167     }
5168     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5169     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5170     if (nisdofs) {
5171       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5172       for (i=0;i<nisdofs;i++) {
5173         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5174       }
5175     }
5176     if (nisneu) {
5177       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5178       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5179     }
5180     if (nisvert) {
5181       ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr);
5182       ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr);
5183     }
5184 
5185     /* get some info after set from options */
5186     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5187     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5188     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5189     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5190       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5191       isbddc = PETSC_FALSE;
5192     }
5193     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5194     if (isredundant) {
5195       KSP inner_ksp;
5196       PC  inner_pc;
5197       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5198       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5199       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5200     }
5201 
5202     /* assemble coarse matrix */
5203     if (coarse_reuse) {
5204       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5205       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5206       coarse_mat_reuse = MAT_REUSE_MATRIX;
5207     } else {
5208       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5209     }
5210     if (isbddc || isnn) {
5211       if (pcbddc->coarsening_ratio > 1) {
5212         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5213           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5214           if (pcbddc->dbg_flag) {
5215             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5216             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5217             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5218             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5219           }
5220         }
5221         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5222       } else {
5223         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5224         coarse_mat = coarse_mat_is;
5225       }
5226     } else {
5227       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5228     }
5229     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5230 
5231     /* propagate symmetry info of coarse matrix */
5232     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5233     if (pc->pmat->symmetric_set) {
5234       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5235     }
5236     if (pc->pmat->hermitian_set) {
5237       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5238     }
5239     if (pc->pmat->spd_set) {
5240       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5241     }
5242     /* set operators */
5243     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5244     if (pcbddc->dbg_flag) {
5245       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5246     }
5247   } else { /* processes non partecipating to coarse solver (if any) */
5248     coarse_mat = 0;
5249   }
5250   ierr = PetscFree(isarray);CHKERRQ(ierr);
5251 #if 0
5252   {
5253     PetscViewer viewer;
5254     char filename[256];
5255     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5256     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5257     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5258     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5259     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5260   }
5261 #endif
5262 
5263   /* Compute coarse null space (special handling by BDDC only) */
5264 #if 0
5265   if (pcbddc->NullSpace) {
5266     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5267   }
5268 #endif
5269   /* hack */
5270   if (pcbddc->coarse_ksp) {
5271     Vec crhs,csol;
5272 
5273     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5274     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5275     if (!csol) {
5276       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5277     }
5278     if (!crhs) {
5279       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5280     }
5281   }
5282 
5283   /* compute null space for coarse solver if the benign trick has been requested */
5284   if (pcbddc->benign_null) {
5285 
5286     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5287     for (i=0;i<pcbddc->benign_n;i++) {
5288       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5289     }
5290     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5291     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5292     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5293     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5294     if (coarse_mat) {
5295       Vec         nullv;
5296       PetscScalar *array,*array2;
5297       PetscInt    nl;
5298 
5299       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5300       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5301       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5302       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5303       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5304       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5305       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5306       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5307       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5308       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5309     }
5310   }
5311 
5312   if (pcbddc->coarse_ksp) {
5313     PetscBool ispreonly;
5314 
5315     if (CoarseNullSpace) {
5316       PetscBool isnull;
5317       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5318       if (isnull) {
5319         if (isbddc && !pcbddc->benign_saddle_point) {
5320           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5321         } else {
5322           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5323         }
5324       } else {
5325         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5326       }
5327     }
5328     /* setup coarse ksp */
5329     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5330     /* Check coarse problem if in debug mode or if solving with an iterative method */
5331     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5332     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5333       KSP       check_ksp;
5334       KSPType   check_ksp_type;
5335       PC        check_pc;
5336       Vec       check_vec,coarse_vec;
5337       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5338       PetscInt  its;
5339       PetscBool compute_eigs;
5340       PetscReal *eigs_r,*eigs_c;
5341       PetscInt  neigs;
5342       const char *prefix;
5343 
5344       /* Create ksp object suitable for estimation of extreme eigenvalues */
5345       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5346       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5347       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5348       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5349       if (ispreonly) {
5350         check_ksp_type = KSPPREONLY;
5351         compute_eigs = PETSC_FALSE;
5352       } else {
5353         check_ksp_type = KSPGMRES;
5354         compute_eigs = PETSC_TRUE;
5355       }
5356       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5357       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5358       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5359       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5360       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5361       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5362       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5363       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5364       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5365       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5366       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5367       /* create random vec */
5368       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5369       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5370       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5371       if (CoarseNullSpace) {
5372         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5373       }
5374       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5375       /* solve coarse problem */
5376       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5377       if (CoarseNullSpace) {
5378         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5379       }
5380       /* set eigenvalue estimation if preonly has not been requested */
5381       if (compute_eigs) {
5382         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5383         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5384         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5385         lambda_max = eigs_r[neigs-1];
5386         lambda_min = eigs_r[0];
5387         if (pcbddc->use_coarse_estimates) {
5388           if (lambda_max>lambda_min) {
5389             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5390             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5391           }
5392         }
5393       }
5394 
5395       /* check coarse problem residual error */
5396       if (pcbddc->dbg_flag) {
5397         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5398         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5399         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5400         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5401         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5402         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5403         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5404         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5405         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5406         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5407         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5408         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5409         if (CoarseNullSpace) {
5410           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5411         }
5412         if (compute_eigs) {
5413           PetscReal lambda_max_s,lambda_min_s;
5414           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5415           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5416           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5417           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5418           for (i=0;i<neigs;i++) {
5419             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5420           }
5421         }
5422         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5423         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5424       }
5425       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5426       if (compute_eigs) {
5427         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5428         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5429       }
5430     }
5431   }
5432   /* print additional info */
5433   if (pcbddc->dbg_flag) {
5434     /* waits until all processes reaches this point */
5435     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5436     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5437     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5438   }
5439 
5440   /* free memory */
5441   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5442   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5443   PetscFunctionReturn(0);
5444 }
5445 
5446 #undef __FUNCT__
5447 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5448 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5449 {
5450   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5451   PC_IS*         pcis = (PC_IS*)pc->data;
5452   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5453   IS             subset,subset_mult,subset_n;
5454   PetscInt       local_size,coarse_size=0;
5455   PetscInt       *local_primal_indices=NULL;
5456   const PetscInt *t_local_primal_indices;
5457   PetscErrorCode ierr;
5458 
5459   PetscFunctionBegin;
5460   /* Compute global number of coarse dofs */
5461   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5462     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5463   }
5464   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5465   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5466   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5467   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5468   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5469   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5470   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5471   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5472   if (local_size != pcbddc->local_primal_size) {
5473     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5474   }
5475   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5476   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5477   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5478   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5479   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5480 
5481   /* check numbering */
5482   if (pcbddc->dbg_flag) {
5483     PetscScalar coarsesum,*array,*array2;
5484     PetscInt    i;
5485     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5486 
5487     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5488     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5489     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5490     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5491     /* counter */
5492     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5493     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5494     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5495     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5496     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5497     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5498     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5499     for (i=0;i<pcbddc->local_primal_size;i++) {
5500       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5501     }
5502     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5503     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5504     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5505     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5506     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5507     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5508     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5509     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5510     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5511     for (i=0;i<pcis->n;i++) {
5512       if (array[i] != 0.0 && array[i] != array2[i]) {
5513         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5514         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5515         set_error = PETSC_TRUE;
5516         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5517       }
5518     }
5519     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5520     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5521     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5522     for (i=0;i<pcis->n;i++) {
5523       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5524     }
5525     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5526     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5527     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5528     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5529     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5530     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5531     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5532       PetscInt *gidxs;
5533 
5534       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5535       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5536       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5537       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5538       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5539       for (i=0;i<pcbddc->local_primal_size;i++) {
5540         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5541       }
5542       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5543       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5544     }
5545     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5546     ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr);
5547     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5548   }
5549   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5550   /* get back data */
5551   *coarse_size_n = coarse_size;
5552   *local_primal_indices_n = local_primal_indices;
5553   PetscFunctionReturn(0);
5554 }
5555 
5556 #undef __FUNCT__
5557 #define __FUNCT__ "PCBDDCGlobalToLocal"
5558 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5559 {
5560   IS             localis_t;
5561   PetscInt       i,lsize,*idxs,n;
5562   PetscScalar    *vals;
5563   PetscErrorCode ierr;
5564 
5565   PetscFunctionBegin;
5566   /* get indices in local ordering exploiting local to global map */
5567   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5568   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5569   for (i=0;i<lsize;i++) vals[i] = 1.0;
5570   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5571   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5572   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5573   if (idxs) { /* multilevel guard */
5574     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5575   }
5576   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5577   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5578   ierr = PetscFree(vals);CHKERRQ(ierr);
5579   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5580   /* now compute set in local ordering */
5581   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5582   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5583   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5584   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5585   for (i=0,lsize=0;i<n;i++) {
5586     if (PetscRealPart(vals[i]) > 0.5) {
5587       lsize++;
5588     }
5589   }
5590   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5591   for (i=0,lsize=0;i<n;i++) {
5592     if (PetscRealPart(vals[i]) > 0.5) {
5593       idxs[lsize++] = i;
5594     }
5595   }
5596   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5597   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5598   *localis = localis_t;
5599   PetscFunctionReturn(0);
5600 }
5601 
5602 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5603 #undef __FUNCT__
5604 #define __FUNCT__ "PCBDDCMatMult_Private"
5605 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5606 {
5607   PCBDDCChange_ctx change_ctx;
5608   PetscErrorCode   ierr;
5609 
5610   PetscFunctionBegin;
5611   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5612   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5613   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5614   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 #undef __FUNCT__
5619 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5620 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5621 {
5622   PCBDDCChange_ctx change_ctx;
5623   PetscErrorCode   ierr;
5624 
5625   PetscFunctionBegin;
5626   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5627   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5628   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5629   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5630   PetscFunctionReturn(0);
5631 }
5632 
5633 #undef __FUNCT__
5634 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5635 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5636 {
5637   PC_IS               *pcis=(PC_IS*)pc->data;
5638   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5639   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5640   Mat                 S_j;
5641   PetscInt            *used_xadj,*used_adjncy;
5642   PetscBool           free_used_adj;
5643   PetscErrorCode      ierr;
5644 
5645   PetscFunctionBegin;
5646   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5647   free_used_adj = PETSC_FALSE;
5648   if (pcbddc->sub_schurs_layers == -1) {
5649     used_xadj = NULL;
5650     used_adjncy = NULL;
5651   } else {
5652     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5653       used_xadj = pcbddc->mat_graph->xadj;
5654       used_adjncy = pcbddc->mat_graph->adjncy;
5655     } else if (pcbddc->computed_rowadj) {
5656       used_xadj = pcbddc->mat_graph->xadj;
5657       used_adjncy = pcbddc->mat_graph->adjncy;
5658     } else {
5659       PetscBool      flg_row=PETSC_FALSE;
5660       const PetscInt *xadj,*adjncy;
5661       PetscInt       nvtxs;
5662 
5663       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5664       if (flg_row) {
5665         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5666         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5667         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5668         free_used_adj = PETSC_TRUE;
5669       } else {
5670         pcbddc->sub_schurs_layers = -1;
5671         used_xadj = NULL;
5672         used_adjncy = NULL;
5673       }
5674       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5675     }
5676   }
5677 
5678   /* setup sub_schurs data */
5679   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5680   if (!sub_schurs->use_mumps) {
5681     /* pcbddc->ksp_D up to date only if not using MUMPS */
5682     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5683     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5684   } else {
5685     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5686     PetscBool isseqaij;
5687     if (!pcbddc->use_vertices && reuse_solvers) {
5688       PetscInt n_vertices;
5689 
5690       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5691       reuse_solvers = (PetscBool)!n_vertices;
5692     }
5693     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5694     if (!isseqaij) {
5695       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5696       if (matis->A == pcbddc->local_mat) {
5697         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5698         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5699       } else {
5700         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5701       }
5702     }
5703     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5704   }
5705   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5706 
5707   /* free adjacency */
5708   if (free_used_adj) {
5709     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5710   }
5711   PetscFunctionReturn(0);
5712 }
5713 
5714 #undef __FUNCT__
5715 #define __FUNCT__ "PCBDDCInitSubSchurs"
5716 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5717 {
5718   PC_IS               *pcis=(PC_IS*)pc->data;
5719   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5720   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5721   PCBDDCGraph         graph;
5722   PetscErrorCode      ierr;
5723 
5724   PetscFunctionBegin;
5725   /* attach interface graph for determining subsets */
5726   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5727     IS       verticesIS,verticescomm;
5728     PetscInt vsize,*idxs;
5729 
5730     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5731     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5732     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5733     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5734     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5735     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5736     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5737     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5738     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5739     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5740     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5741 /*
5742     if (pcbddc->dbg_flag) {
5743       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5744     }
5745 */
5746   } else {
5747     graph = pcbddc->mat_graph;
5748   }
5749 
5750   /* sub_schurs init */
5751   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5752 
5753   /* free graph struct */
5754   if (pcbddc->sub_schurs_rebuild) {
5755     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5756   }
5757   PetscFunctionReturn(0);
5758 }
5759 
5760 #undef __FUNCT__
5761 #define __FUNCT__ "PCBDDCCheckOperator"
5762 PetscErrorCode PCBDDCCheckOperator(PC pc)
5763 {
5764   PC_IS               *pcis=(PC_IS*)pc->data;
5765   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5766   PetscErrorCode      ierr;
5767 
5768   PetscFunctionBegin;
5769   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5770     IS             zerodiag = NULL;
5771     Mat            S_j,B0_B=NULL;
5772     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5773     PetscScalar    *p0_check,*array,*array2;
5774     PetscReal      norm;
5775     PetscInt       i;
5776 
5777     /* B0 and B0_B */
5778     if (zerodiag) {
5779       IS       dummy;
5780 
5781       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5782       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5783       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5784       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5785     }
5786     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5787     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5788     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5789     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5790     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5791     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5792     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5793     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5794     /* S_j */
5795     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5796     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5797 
5798     /* mimic vector in \widetilde{W}_\Gamma */
5799     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5800     /* continuous in primal space */
5801     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5802     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5803     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5804     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5805     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5806     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5807     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5808     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5809     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5810     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5811     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5812     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5813     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5814     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5815 
5816     /* assemble rhs for coarse problem */
5817     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5818     /* local with Schur */
5819     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5820     if (zerodiag) {
5821       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5822       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5823       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5824       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5825     }
5826     /* sum on primal nodes the local contributions */
5827     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5828     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5829     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5830     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5831     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5832     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5833     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5834     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5835     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5836     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5837     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5838     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5839     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5840     /* scale primal nodes (BDDC sums contibutions) */
5841     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5842     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5843     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5844     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5845     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5846     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5847     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5848     /* global: \widetilde{B0}_B w_\Gamma */
5849     if (zerodiag) {
5850       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5851       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5852       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5853       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5854     }
5855     /* BDDC */
5856     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5857     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5858 
5859     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5860     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5861     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5862     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5863     for (i=0;i<pcbddc->benign_n;i++) {
5864       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5865     }
5866     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5867     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5868     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5869     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5870     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5871     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5872   }
5873   PetscFunctionReturn(0);
5874 }
5875