xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a0b0af32e64c174988f089dab15f54288eaaeb81)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146 
147   /* partial clean up */
148   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
149   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
150   if (A != B) {
151     ierr = MatDestroy(&B);CHKERRQ(ierr);
152   }
153 
154   /* get back data */
155   if (ncc) *ncc = graph->ncc;
156   if (cc) {
157     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
158     for (i=0;i<graph->ncc;i++) {
159       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
160     }
161     *cc = cc_n;
162   }
163   /* clean up graph */
164   graph->xadj = 0;
165   graph->adjncy = 0;
166   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "PCBDDCBenignCheck"
172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
173 {
174   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
175   PC_IS*         pcis = (PC_IS*)(pc->data);
176   IS             dirIS = NULL;
177   PetscInt       i;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
182   if (zerodiag) {
183     Mat            A;
184     Vec            vec3_N;
185     PetscScalar    *vals;
186     const PetscInt *idxs;
187     PetscInt       nz;
188 
189     /* p0 */
190     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
191     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
192     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
193     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
194     for (i=0;i<nz;i++) vals[i] = 1.;
195     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
196     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
197     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
198     /* v_I */
199     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
200     for (i=0;i<nz;i++) vals[i] = 0.;
201     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
202     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
203     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
204     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
205     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
206     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
207     if (dirIS) {
208       PetscInt n;
209 
210       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
211       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
212       for (i=0;i<n;i++) vals[i] = 0.;
213       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
214       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
215     }
216     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
218     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
219     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
220     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
221     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
222     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
223     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
224       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
225     }
226     ierr = PetscFree(vals);CHKERRQ(ierr);
227     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
228   }
229   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
230 
231   /* check PCBDDCBenignGetOrSetP0 */
232   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
233   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
234   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
235   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
236   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
237   for (i=0;i<pcbddc->benign_n;i++) {
238     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
239       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
240     }
241   }
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
248 {
249   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
250   IS             pressures,zerodiag,*zerodiag_subs;
251   PetscInt       nz,n;
252   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
257   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
258   pcbddc->benign_original_mat = pcbddc->local_mat;
259   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
260      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
261      Checks if all the pressure dofs in each subdomain have a zero diagonal
262      If not, a change of basis on pressures is not needed
263      since the local Schur complements are SPD
264   */
265   has_null_pressures = PETSC_TRUE;
266   have_null = PETSC_TRUE;
267   if (pcbddc->n_ISForDofsLocal) {
268     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
269 
270     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
271     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
272     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
273     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
274     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
275     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
276     if (!sorted) {
277       ierr = ISSort(pressures);CHKERRQ(ierr);
278     }
279   } else {
280     pressures = NULL;
281   }
282   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
283   /* TODO: add check for shared dofs and raise error */
284   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
285   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
286   if (!sorted) {
287     ierr = ISSort(zerodiag);CHKERRQ(ierr);
288   }
289   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
290   if (!nz) {
291     if (n) have_null = PETSC_FALSE;
292     has_null_pressures = PETSC_FALSE;
293     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
294   }
295   recompute_zerodiag = PETSC_FALSE;
296   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
297   zerodiag_subs = NULL;
298   pcbddc->benign_n = 0;
299   if (has_null_pressures) {
300     IS       *subs;
301     PetscInt nsubs,i;
302 
303     subs = pcbddc->local_subs;
304     nsubs = pcbddc->n_local_subs;
305     if (nsubs > 1) {
306       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
307       for (i=0;i<nsubs;i++) {
308         ISLocalToGlobalMapping l2g;
309         IS                     t_zerodiag_subs;
310         PetscInt               nl;
311 
312         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
313         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
314         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
315         if (nl) {
316           PetscBool valid = PETSC_TRUE;
317 
318           if (pressures) {
319             IS t_pressure_subs;
320             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
321             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
322             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
323           }
324           if (valid) {
325             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
326             pcbddc->benign_n++;
327           } else {
328             recompute_zerodiag = PETSC_TRUE;
329           }
330         }
331         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
332         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
333       }
334     } else { /* there's just one subdomain (or zero if they have not been detected */
335       PetscBool valid = PETSC_TRUE;
336       if (pressures) {
337         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
338       }
339       if (valid) {
340         pcbddc->benign_n = 1;
341         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
342         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
343         zerodiag_subs[0] = zerodiag;
344       }
345     }
346   }
347 
348   if (!pcbddc->benign_n) {
349     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
350     recompute_zerodiag = PETSC_FALSE;
351     has_null_pressures = PETSC_FALSE;
352     have_null = PETSC_FALSE;
353   }
354 
355   /* final check for null pressures */
356   if (zerodiag && pressures) {
357     PetscInt nz,np;
358     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
359     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
360     if (nz != np) have_null = PETSC_FALSE;
361   }
362 
363   if (recompute_zerodiag) {
364     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
365     if (pcbddc->benign_n == 1) {
366       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
367       zerodiag = zerodiag_subs[0];
368     } else {
369       PetscInt i,nzn,*new_idxs;
370 
371       nzn = 0;
372       for (i=0;i<pcbddc->benign_n;i++) {
373         PetscInt ns;
374         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
375         nzn += ns;
376       }
377       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
378       nzn = 0;
379       for (i=0;i<pcbddc->benign_n;i++) {
380         PetscInt ns,*idxs;
381         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
382         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
383         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
384         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         nzn += ns;
386       }
387       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
388       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
389     }
390     have_null = PETSC_FALSE;
391   }
392 
393   if (has_null_pressures) {
394     IS             zerodiagc;
395     const PetscInt *idxs,*idxsc;
396     PetscInt       i,s,*nnz;
397     Mat            M;
398 
399     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
400     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
401     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
402     /* local change of basis for pressures */
403     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
404     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
405     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
406     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
407     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
408     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
409     for (i=0;i<pcbddc->benign_n;i++) {
410       PetscInt nzs,j;
411 
412       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
413       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
414       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
415       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
416       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
417     }
418     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
419     ierr = PetscFree(nnz);CHKERRQ(ierr);
420     /* set identity on velocities */
421     for (i=0;i<n-nz;i++) {
422       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
423     }
424     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
425     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
426     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
427     /* set change on pressures */
428     for (s=0;s<pcbddc->benign_n;s++) {
429       PetscScalar *array;
430       PetscInt    nzs;
431 
432       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
433       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
434       for (i=0;i<nzs-1;i++) {
435         PetscScalar vals[2];
436         PetscInt    cols[2];
437 
438         cols[0] = idxs[i];
439         cols[1] = idxs[nzs-1];
440         vals[0] = 1.;
441         vals[1] = 1.;
442         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
443       }
444       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
445       for (i=0;i<nzs-1;i++) array[i] = -1.;
446       array[nzs-1] = 1.;
447       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
448       /* store local idxs for p0 */
449       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
450       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
451       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
452       ierr = PetscFree(array);CHKERRQ(ierr);
453     }
454     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     /* TODO: need optimization? */
457     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
458     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
459     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
460     ierr = MatDestroy(&M);CHKERRQ(ierr);
461     /* store global idxs for p0 */
462     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
465   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
466 
467   /* determines if the coarse solver will be singular or not */
468   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
469   *zerodiaglocal = zerodiag;
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
475 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
476 {
477   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   if (!pcbddc->benign_sf) {
482     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
483     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
484   }
485   if (get) { /* use SF to get values */
486     PetscScalar *array;
487 
488     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
489     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
490     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
491     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
492   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
493     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
494     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
495     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
496   }
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
502 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
503 {
504   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   /* TODO: add error checking
509     - avoid nested pop (or push) calls.
510     - cannot push before pop.
511     - cannot call this if pcbddc->local_mat is NULL
512   */
513   if (!pcbddc->benign_n) {
514     PetscFunctionReturn(0);
515   }
516   if (pop) {
517     IS       is_p0;
518     MatReuse reuse;
519 
520     /* extract B_0 */
521     reuse = MAT_INITIAL_MATRIX;
522     if (pcbddc->benign_B0) {
523       reuse = MAT_REUSE_MATRIX;
524     }
525     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
526     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
527     /* remove rows and cols from local problem */
528     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
529     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
530     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
531   } else { /* push */
532     PetscInt i;
533 
534     for (i=0;i<pcbddc->benign_n;i++) {
535       PetscScalar *B0_vals;
536       PetscInt    *B0_cols,B0_ncol;
537 
538       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
539       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
540       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
541       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
542       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
543     }
544     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "PCBDDCAdaptiveSelection"
552 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
553 {
554   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
555   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
556   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
557   PetscBLASInt    *B_iwork,*B_ifail;
558   PetscScalar     *work,lwork;
559   PetscScalar     *St,*S,*eigv;
560   PetscScalar     *Sarray,*Starray;
561   PetscReal       *eigs,thresh;
562   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
563   PetscBool       allocated_S_St;
564 #if defined(PETSC_USE_COMPLEX)
565   PetscReal       *rwork;
566 #endif
567   PetscErrorCode  ierr;
568 
569   PetscFunctionBegin;
570   if (!sub_schurs->use_mumps) {
571     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
572   }
573 
574   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
575     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
576   }
577 
578   if (pcbddc->dbg_flag) {
579     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
580     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
581     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
582     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
583   }
584 
585   if (pcbddc->dbg_flag) {
586     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
587   }
588 
589   /* max size of subsets */
590   mss = 0;
591   for (i=0;i<sub_schurs->n_subs;i++) {
592     PetscInt subset_size;
593 
594     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
595     mss = PetscMax(mss,subset_size);
596   }
597 
598   /* min/max and threshold */
599   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
600   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
601   nmax = PetscMax(nmin,nmax);
602   allocated_S_St = PETSC_FALSE;
603   if (nmin) {
604     allocated_S_St = PETSC_TRUE;
605   }
606 
607   /* allocate lapack workspace */
608   cum = cum2 = 0;
609   maxneigs = 0;
610   for (i=0;i<sub_schurs->n_subs;i++) {
611     PetscInt n,subset_size;
612 
613     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
614     n = PetscMin(subset_size,nmax);
615     cum += subset_size;
616     cum2 += subset_size*n;
617     maxneigs = PetscMax(maxneigs,n);
618   }
619   if (mss) {
620     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
621       PetscBLASInt B_itype = 1;
622       PetscBLASInt B_N = mss;
623       PetscReal    zero = 0.0;
624       PetscReal    eps = 0.0; /* dlamch? */
625 
626       B_lwork = -1;
627       S = NULL;
628       St = NULL;
629       eigs = NULL;
630       eigv = NULL;
631       B_iwork = NULL;
632       B_ifail = NULL;
633 #if defined(PETSC_USE_COMPLEX)
634       rwork = NULL;
635 #endif
636       thresh = 1.0;
637       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
638 #if defined(PETSC_USE_COMPLEX)
639       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
640 #else
641       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
642 #endif
643       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
644       ierr = PetscFPTrapPop();CHKERRQ(ierr);
645     } else {
646         /* TODO */
647     }
648   } else {
649     lwork = 0;
650   }
651 
652   nv = 0;
653   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
654     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
655   }
656   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
657   if (allocated_S_St) {
658     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
659   }
660   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
661 #if defined(PETSC_USE_COMPLEX)
662   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
663 #endif
664   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
665                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
666                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
667                       nv+cum,&pcbddc->adaptive_constraints_idxs,
668                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
669   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
670 
671   maxneigs = 0;
672   cum = cum2 = cumarray = 0;
673   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
674   pcbddc->adaptive_constraints_data_ptr[0] = 0;
675   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
676     const PetscInt *idxs;
677 
678     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
679     for (cum=0;cum<nv;cum++) {
680       pcbddc->adaptive_constraints_n[cum] = 1;
681       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
682       pcbddc->adaptive_constraints_data[cum] = 1.0;
683       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
684       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
685     }
686     cum2 = cum;
687     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
688   }
689 
690   if (mss) { /* multilevel */
691     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
692     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
693   }
694 
695   for (i=0;i<sub_schurs->n_subs;i++) {
696 
697     const PetscInt *idxs;
698     PetscReal      infty = PETSC_MAX_REAL;
699     PetscInt       j,subset_size,eigs_start = 0;
700     PetscBLASInt   B_N;
701     PetscBool      same_data = PETSC_FALSE;
702 
703     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
704     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
705     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
706       if (sub_schurs->is_hermitian) {
707         PetscInt j,k;
708         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
709           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
710           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
711         }
712         for (j=0;j<subset_size;j++) {
713           for (k=j;k<subset_size;k++) {
714             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
715             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
716           }
717         }
718       } else {
719         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
720         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
721       }
722     } else {
723       S = Sarray + cumarray;
724       St = Starray + cumarray;
725     }
726 
727     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
728     /* see if we can save some work */
729     if (sub_schurs->n_subs == 1) {
730       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
731     }
732 
733     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
734       B_neigs = 0;
735     } else {
736       /* Threshold: this is an heuristic for edges */
737       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
738       if (nmin) {
739         Mat SM,StM;
740         PetscInt j,k,nccs,nccst;
741 
742         for (j=0;j<subset_size;j++) {
743           for (k=j;k<subset_size;k++) {
744             S [k*subset_size+j] = S [j*subset_size+k];
745             St[k*subset_size+j] = St[j*subset_size+k];
746           }
747         }
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr);
749         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr);
750         ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr);
751         ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr);
752         if (nccs != 1 || nccst != 1) {
753           PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size);
754         }
755         ierr = MatDestroy(&SM);CHKERRQ(ierr);
756         ierr = MatDestroy(&StM);CHKERRQ(ierr);
757       }
758 
759       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
760         PetscBLASInt B_itype = 1;
761         PetscBLASInt B_IL, B_IU;
762         PetscReal    eps = -1.0; /* dlamch? */
763         PetscInt     nmin_s;
764 
765         if (pcbddc->dbg_flag) {
766           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
767         }
768 
769         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
770         if (thresh > 1.+PETSC_SMALL) {
771 
772           /* ask for eigenvalues larger than thresh */
773 #if defined(PETSC_USE_COMPLEX)
774           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
775 #else
776           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
777 #endif
778         } else {
779           B_IU = PetscMax(1,PetscMin(B_N,nmax));
780           B_IL = 1;
781 #if defined(PETSC_USE_COMPLEX)
782           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
783 #else
784           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
785 #endif
786         }
787         ierr = PetscFPTrapPop();CHKERRQ(ierr);
788         if (B_ierr) {
789           if (B_ierr < 0 ) {
790             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
791           } else if (B_ierr <= B_N) {
792             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
793           } else {
794             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
795           }
796         }
797 
798         if (B_neigs > nmax) {
799           if (pcbddc->dbg_flag) {
800             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
801           }
802           eigs_start = B_neigs -nmax;
803           B_neigs = nmax;
804         }
805 
806         nmin_s = PetscMin(nmin,B_N);
807         if (B_neigs < nmin_s) {
808           PetscBLASInt B_neigs2;
809 
810           B_IU = B_N - B_neigs;
811           B_IL = B_N - nmin_s + 1;
812           if (pcbddc->dbg_flag) {
813             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
814           }
815           if (sub_schurs->is_hermitian) {
816             PetscInt j;
817             for (j=0;j<subset_size;j++) {
818               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
819             }
820             for (j=0;j<subset_size;j++) {
821               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
822             }
823           } else {
824             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
825             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
826           }
827           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
828 #if defined(PETSC_USE_COMPLEX)
829           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
830 #else
831           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
832 #endif
833           ierr = PetscFPTrapPop();CHKERRQ(ierr);
834           B_neigs += B_neigs2;
835         }
836         if (B_ierr) {
837           if (B_ierr < 0 ) {
838             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
839           } else if (B_ierr <= B_N) {
840             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
841           } else {
842             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
843           }
844         }
845         if (pcbddc->dbg_flag) {
846           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
847           for (j=0;j<B_neigs;j++) {
848             if (eigs[j] == 0.0) {
849               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
850             } else {
851               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
852             }
853           }
854         }
855       } else {
856           /* TODO */
857       }
858     }
859     maxneigs = PetscMax(B_neigs,maxneigs);
860     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
861     if (B_neigs) {
862       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
863 
864       if (pcbddc->dbg_flag > 1) {
865         PetscInt ii;
866         for (ii=0;ii<B_neigs;ii++) {
867           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
868           for (j=0;j<B_N;j++) {
869 #if defined(PETSC_USE_COMPLEX)
870             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
871             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
872             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
873 #else
874             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
875 #endif
876           }
877         }
878       }
879 #if 0
880       for (j=0;j<B_neigs;j++) {
881         PetscBLASInt Blas_N,Blas_one = 1.0;
882         PetscScalar norm;
883         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
884         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
885                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
886         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
887           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
888         } else {
889           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
890         }
891         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
892       }
893 #endif
894       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
895       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
896       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
897       cum++;
898     }
899     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
900     /* shift for next computation */
901     cumarray += subset_size*subset_size;
902   }
903   if (pcbddc->dbg_flag) {
904     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
905   }
906 
907   if (mss) {
908     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
909     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
910     /* destroy matrices (junk) */
911     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
912     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
913   }
914   if (allocated_S_St) {
915     ierr = PetscFree2(S,St);CHKERRQ(ierr);
916   }
917   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
918 #if defined(PETSC_USE_COMPLEX)
919   ierr = PetscFree(rwork);CHKERRQ(ierr);
920 #endif
921   if (pcbddc->dbg_flag) {
922     PetscInt maxneigs_r;
923     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
924     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "PCBDDCSetUpSolvers"
931 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
932 {
933   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
934   PetscScalar    *coarse_submat_vals;
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   /* Setup local scatters R_to_B and (optionally) R_to_D */
939   /* PCBDDCSetUpLocalWorkVectors should be called first! */
940   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
941 
942   /* Setup local neumann solver ksp_R */
943   /* PCBDDCSetUpLocalScatters should be called first! */
944   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
945 
946   /* Change global null space passed in by the user if change of basis has been requested */
947   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
948     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
949   }
950 
951   /*
952      Setup local correction and local part of coarse basis.
953      Gives back the dense local part of the coarse matrix in column major ordering
954   */
955   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
956 
957   /* Compute total number of coarse nodes and setup coarse solver */
958   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
959 
960   /* free */
961   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "PCBDDCResetCustomization"
967 PetscErrorCode PCBDDCResetCustomization(PC pc)
968 {
969   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
974   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
975   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
976   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
977   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
978   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
979   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
980   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
981   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
982   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
983   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "PCBDDCResetTopography"
989 PetscErrorCode PCBDDCResetTopography(PC pc)
990 {
991   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
992   PetscInt       i;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
997   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
998   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
999   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
1000   for (i=0;i<pcbddc->n_local_subs;i++) {
1001     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
1002   }
1003   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
1004   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "PCBDDCResetSolvers"
1010 PetscErrorCode PCBDDCResetSolvers(PC pc)
1011 {
1012   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1013   PetscScalar    *array;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
1018   if (pcbddc->coarse_phi_B) {
1019     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
1020     ierr = PetscFree(array);CHKERRQ(ierr);
1021   }
1022   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1023   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1024   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1025   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1026   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1027   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1028   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1029   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1030   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1031   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1032   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1033   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1034   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1035   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1036   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1037   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1038   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1039   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1040   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1041   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1042   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1043   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1044   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1045   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1046   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1047   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1048   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1049   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1050   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1051   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1057 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1058 {
1059   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1060   PC_IS          *pcis = (PC_IS*)pc->data;
1061   VecType        impVecType;
1062   PetscInt       n_constraints,n_R,old_size;
1063   PetscErrorCode ierr;
1064 
1065   PetscFunctionBegin;
1066   if (!pcbddc->ConstraintMatrix) {
1067     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1068   }
1069   /* get sizes */
1070   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1071   n_R = pcis->n - pcbddc->n_vertices;
1072   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1073   /* local work vectors (try to avoid unneeded work)*/
1074   /* R nodes */
1075   old_size = -1;
1076   if (pcbddc->vec1_R) {
1077     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1078   }
1079   if (n_R != old_size) {
1080     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1081     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1082     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1083     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1084     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1085     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1086   }
1087   /* local primal dofs */
1088   old_size = -1;
1089   if (pcbddc->vec1_P) {
1090     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1091   }
1092   if (pcbddc->local_primal_size != old_size) {
1093     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1094     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1095     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1096     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1097   }
1098   /* local explicit constraints */
1099   old_size = -1;
1100   if (pcbddc->vec1_C) {
1101     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1102   }
1103   if (n_constraints && n_constraints != old_size) {
1104     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1105     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1106     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1107     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1108   }
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "PCBDDCSetUpCorrection"
1114 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1115 {
1116   PetscErrorCode  ierr;
1117   /* pointers to pcis and pcbddc */
1118   PC_IS*          pcis = (PC_IS*)pc->data;
1119   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1120   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1121   /* submatrices of local problem */
1122   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1123   /* submatrices of benign trick */
1124   Mat             B0_V = NULL;
1125   /* submatrices of local coarse problem */
1126   Mat             S_VV,S_CV,S_VC,S_CC;
1127   /* working matrices */
1128   Mat             C_CR;
1129   /* additional working stuff */
1130   PC              pc_R;
1131   Mat             F;
1132   PetscBool       isLU,isCHOL,isILU;
1133 
1134   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1135   PetscScalar     *work;
1136   PetscInt        *idx_V_B;
1137   PetscInt        n,n_vertices,n_constraints,*p0_lidx_I;
1138   PetscInt        i,n_R,n_D,n_B;
1139   PetscBool       unsymmetric_check;
1140   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
1141   MatType         impMatType;
1142   /* some shortcuts to scalars */
1143   PetscScalar     one=1.0,m_one=-1.0;
1144 
1145   PetscFunctionBegin;
1146   n_vertices = pcbddc->n_vertices;
1147   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1148   /* Set Non-overlapping dimensions */
1149   n_B = pcis->n_B;
1150   n_D = pcis->n - n_B;
1151   n_R = pcis->n - n_vertices;
1152 
1153   /* Set types for local objects needed by BDDC precondtioner */
1154   impMatType = MATSEQDENSE;
1155 
1156   /* vertices in boundary numbering */
1157   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1158   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1159   if (i != n_vertices) {
1160     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1161   }
1162 
1163   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1164   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1165   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1166   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1167   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1168   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1169   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1170   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1171   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1172   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1173 
1174   unsymmetric_check = PETSC_FALSE;
1175   /* allocate workspace */
1176   n = 0;
1177   if (n_constraints) {
1178     n += n_R*n_constraints;
1179   }
1180   if (n_vertices) {
1181     n = PetscMax(2*n_R*n_vertices,n);
1182     n = PetscMax((n_R+n_B)*n_vertices,n);
1183   }
1184   if (!pcbddc->symmetric_primal) {
1185     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
1186     unsymmetric_check = PETSC_TRUE;
1187   }
1188   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1189 
1190   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1191   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1192   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1193   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1194   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1195   if (isLU || isILU || isCHOL) {
1196     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1197   } else if (sub_schurs->reuse_mumps) {
1198     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1199     MatFactorType type;
1200 
1201     F = reuse_mumps->F;
1202     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1203     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1204   } else {
1205     F = NULL;
1206   }
1207 
1208   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1209   if (n_constraints) {
1210     Mat         M1,M2,M3;
1211     Mat         auxmat;
1212     IS          is_aux;
1213     PetscScalar *array,*array2;
1214 
1215     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1216     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1217 
1218     /* Extract constraints on R nodes: C_{CR}  */
1219     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1220     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1221     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1222 
1223     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1224     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1225     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1226     for (i=0;i<n_constraints;i++) {
1227       const PetscScalar *row_cmat_values;
1228       const PetscInt    *row_cmat_indices;
1229       PetscInt          size_of_constraint,j;
1230 
1231       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1232       for (j=0;j<size_of_constraint;j++) {
1233         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
1234       }
1235       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1236     }
1237     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1238     if (F) {
1239       Mat B;
1240 
1241       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1242       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1243       ierr = MatDestroy(&B);CHKERRQ(ierr);
1244     } else {
1245       PetscScalar *marr;
1246 
1247       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1248       for (i=0;i<n_constraints;i++) {
1249         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1250         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
1251         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1252         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1253         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1254       }
1255       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1256     }
1257     if (!pcbddc->switch_static) {
1258       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1259       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1260       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1261       for (i=0;i<n_constraints;i++) {
1262         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
1263         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1264         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1267         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1268       }
1269       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1270       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1271       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1272     } else {
1273       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1274       pcbddc->local_auxmat2 = local_auxmat2_R;
1275       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1276     }
1277     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1278     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1279     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1280     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1281     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1282     if (isCHOL) {
1283       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1284     } else {
1285       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1286     }
1287     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1288     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1289     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1290     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1291     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1292     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1293     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1294     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1295     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1296     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1297   }
1298   /* Get submatrices from subdomain matrix */
1299   if (pcbddc->benign_n) {
1300     IS        dummy;
1301     Mat       B0_R;
1302     PetscReal norm;
1303 
1304     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1305     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1306     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1307     if (norm > PETSC_SMALL) {
1308       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1309     }
1310     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1311     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1312   }
1313 
1314   if (n_vertices) {
1315     IS is_aux;
1316 
1317     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1318       IS tis;
1319 
1320       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1321       ierr = ISSort(tis);CHKERRQ(ierr);
1322       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1323       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1324     } else {
1325       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1326     }
1327     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1328     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1329     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1330     if (pcbddc->benign_n) {
1331       IS dummy;
1332 
1333       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1334       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1335       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1336     }
1337     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1338   }
1339 
1340   /* Matrix of coarse basis functions (local) */
1341   if (pcbddc->coarse_phi_B) {
1342     PetscInt on_B,on_primal,on_D=n_D;
1343     if (pcbddc->coarse_phi_D) {
1344       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1345     }
1346     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1347     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1348       PetscScalar *marray;
1349 
1350       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1351       ierr = PetscFree(marray);CHKERRQ(ierr);
1352       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1353       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1354       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1355       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1356     }
1357   }
1358 
1359   if (!pcbddc->coarse_phi_B) {
1360     PetscScalar *marray;
1361 
1362     n = n_B*pcbddc->local_primal_size;
1363     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364       n += n_D*pcbddc->local_primal_size;
1365     }
1366     if (!pcbddc->symmetric_primal) {
1367       n *= 2;
1368     }
1369     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1370     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1371     n = n_B*pcbddc->local_primal_size;
1372     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1373       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1374       n += n_D*pcbddc->local_primal_size;
1375     }
1376     if (!pcbddc->symmetric_primal) {
1377       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1378       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1379         n = n_B*pcbddc->local_primal_size;
1380         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1381       }
1382     } else {
1383       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1384       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1385       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1386         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1387         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1388       }
1389     }
1390   }
1391 
1392   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1393   p0_lidx_I = NULL;
1394   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1395     const PetscInt *idxs;
1396 
1397     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1398     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1399     for (i=0;i<pcbddc->benign_n;i++) {
1400       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1401     }
1402     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1403   }
1404 
1405   /* vertices */
1406   if (n_vertices) {
1407 
1408     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1409 
1410     if (n_R) {
1411       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1412       PetscBLASInt B_N,B_one = 1;
1413       PetscScalar  *x,*y;
1414       PetscBool    isseqaij;
1415 
1416       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1417       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1418       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1419       if (F) { /* TODO could be optimized for symmetric problems */
1420         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1421       } else {
1422         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1423         for (i=0;i<n_vertices;i++) {
1424           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1425           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1426           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1427           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1428           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1429         }
1430         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1431       }
1432       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1433       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1434       if (n_constraints) {
1435         Mat B;
1436 
1437         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1438         for (i=0;i<n_vertices;i++) {
1439           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1440           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1441           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1444           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1445         }
1446         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1447         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1448         ierr = MatDestroy(&B);CHKERRQ(ierr);
1449         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1450         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1451         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1452         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1453         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1454         ierr = MatDestroy(&B);CHKERRQ(ierr);
1455       }
1456       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1457       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1458         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1459       }
1460       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1461       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1462       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1463       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1464       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1465       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1466       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1467       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1468       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1469       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1470     } else {
1471       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1472     }
1473     for (i=0;i<pcbddc->benign_n;i++) {
1474       const PetscScalar *vals;
1475       const PetscInt    *idxs;
1476       PetscInt          n,j,primal_idx;
1477 
1478       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1479       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1480       for (j=0;j<n;j++) {
1481         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1482         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1483       }
1484       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1485     }
1486     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1487 
1488     /* coarse basis functions */
1489     for (i=0;i<n_vertices;i++) {
1490       PetscScalar *y;
1491 
1492       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1493       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1494       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1495       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497       y[n_B*i+idx_V_B[i]] = 1.0;
1498       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1499       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1500 
1501       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1502         PetscInt j;
1503 
1504         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1505         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1506         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1507         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1508         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1509         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1510         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1511       }
1512       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1513     }
1514     /* if n_R == 0 the object is not destroyed */
1515     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1516   }
1517 
1518   if (n_constraints) {
1519     Mat B;
1520 
1521     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1522     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1523     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1524     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1525     if (n_vertices) {
1526       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1527         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1528       } else {
1529         Mat S_VCt;
1530 
1531         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1532         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1533         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1534       }
1535     }
1536     ierr = MatDestroy(&B);CHKERRQ(ierr);
1537     /* coarse basis functions */
1538     for (i=0;i<n_constraints;i++) {
1539       PetscScalar *y;
1540 
1541       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1542       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1543       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1544       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1547       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1548       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1549         PetscInt j;
1550 
1551         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1552         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1553         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1556         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1557         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1558       }
1559       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1560     }
1561   }
1562   if (n_constraints) {
1563     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1564   }
1565   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1566   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1567 
1568   /* compute other basis functions for non-symmetric problems */
1569   if (!pcbddc->symmetric_primal) {
1570 
1571     if (n_constraints) {
1572       Mat S_CCT,B_C;
1573 
1574       /* this is a lazy thing */
1575       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1576       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1577       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1578       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1579       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1580       if (n_vertices) {
1581         Mat B_V,S_VCT;
1582 
1583         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1584         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1585         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1586         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1587         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1588       }
1589       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1590     } else { /* if there are no constraints, reset work */
1591       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1592     }
1593     if (n_vertices && n_R) {
1594       Mat          A_VRT;
1595       PetscScalar  *marray;
1596       PetscBLASInt B_N,B_one = 1;
1597 
1598       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1599       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1600       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1601       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1602       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1603       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1604       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1605     }
1606 
1607     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1608       for (i=0;i<pcbddc->local_primal_size;i++) {
1609         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1610         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1611         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1612         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1613         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1614       }
1615     } else {
1616       for (i=0;i<pcbddc->local_primal_size;i++) {
1617         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1618         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1619         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1620         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1621         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1622       }
1623     }
1624     /* coarse basis functions */
1625     for (i=0;i<pcbddc->local_primal_size;i++) {
1626       PetscScalar *y;
1627 
1628       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1629       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1630       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1631       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633       if (i<n_vertices) {
1634         y[n_B*i+idx_V_B[i]] = 1.0;
1635       }
1636       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1637       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1638 
1639       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1640         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1641         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1642         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1645         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1646       }
1647       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1648     }
1649   }
1650   /* free memory */
1651   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1652   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1653   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1654   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1655   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1656   ierr = PetscFree(work);CHKERRQ(ierr);
1657   if (n_vertices) {
1658     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1659   }
1660   if (n_constraints) {
1661     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1662   }
1663   /* Checking coarse_sub_mat and coarse basis functios */
1664   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1665   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1666   if (pcbddc->dbg_flag) {
1667     Mat         coarse_sub_mat;
1668     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1669     Mat         coarse_phi_D,coarse_phi_B;
1670     Mat         coarse_psi_D,coarse_psi_B;
1671     Mat         A_II,A_BB,A_IB,A_BI;
1672     Mat         C_B,CPHI;
1673     IS          is_dummy;
1674     Vec         mones;
1675     MatType     checkmattype=MATSEQAIJ;
1676     PetscReal   real_value;
1677 
1678     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1679     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1680     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1681     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1682     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1683     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1684     if (unsymmetric_check) {
1685       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1686       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1687     }
1688     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1689 
1690     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1691     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1692     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1693     if (unsymmetric_check) {
1694       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1695       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1696       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1697       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1698       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1699       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1700       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1701       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1702       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1703       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1704       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1705       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1706     } else {
1707       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1708       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1709       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1710       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1711       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1712       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1713       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1714       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1715     }
1716     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1717     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1718     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1719     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1720     if (pcbddc->benign_n) {
1721       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1722       PetscScalar *data,*data2;
1723       PetscInt    j;
1724 
1725       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1726       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1727       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1728       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1729       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1730       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1731       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1732       for (j=0;j<pcbddc->benign_n;j++) {
1733         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1734         for (i=0;i<pcbddc->local_primal_size;i++) {
1735           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1736           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1737         }
1738       }
1739       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1740       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1741       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1742       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1743       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1744       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1745       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1746       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1747       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1748     }
1749 #if 0
1750   {
1751     PetscViewer viewer;
1752     char filename[256];
1753     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1754     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1755     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1756     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1757     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1758   }
1759 #endif
1760     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1761     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1762     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1763     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1764 
1765     /* check constraints */
1766     if (!pcbddc->benign_n) { /* TODO: add benign case */
1767       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1768       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1769       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1770       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1771       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1772       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1773       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1774       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1775       if (unsymmetric_check) {
1776         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1777         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1778         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1779         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1780         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1781       }
1782       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1783       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1784       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1785       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1786     }
1787     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1788     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1789     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1790     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1791     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1792     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1793     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1794     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1795     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1796     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1797     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1798     if (unsymmetric_check) {
1799       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1800       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1801     }
1802     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1803   }
1804   /* get back data */
1805   *coarse_submat_vals_n = coarse_submat_vals;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1811 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1812 {
1813   Mat            *work_mat;
1814   IS             isrow_s,iscol_s;
1815   PetscBool      rsorted,csorted;
1816   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1821   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1822   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1823   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1824 
1825   if (!rsorted) {
1826     const PetscInt *idxs;
1827     PetscInt *idxs_sorted,i;
1828 
1829     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1830     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1831     for (i=0;i<rsize;i++) {
1832       idxs_perm_r[i] = i;
1833     }
1834     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1835     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1836     for (i=0;i<rsize;i++) {
1837       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1838     }
1839     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1840     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1841   } else {
1842     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1843     isrow_s = isrow;
1844   }
1845 
1846   if (!csorted) {
1847     if (isrow == iscol) {
1848       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1849       iscol_s = isrow_s;
1850     } else {
1851       const PetscInt *idxs;
1852       PetscInt *idxs_sorted,i;
1853 
1854       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1855       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1856       for (i=0;i<csize;i++) {
1857         idxs_perm_c[i] = i;
1858       }
1859       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1860       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1861       for (i=0;i<csize;i++) {
1862         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1863       }
1864       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1865       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1866     }
1867   } else {
1868     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1869     iscol_s = iscol;
1870   }
1871 
1872   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1873 
1874   if (!rsorted || !csorted) {
1875     Mat      new_mat;
1876     IS       is_perm_r,is_perm_c;
1877 
1878     if (!rsorted) {
1879       PetscInt *idxs_r,i;
1880       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1881       for (i=0;i<rsize;i++) {
1882         idxs_r[idxs_perm_r[i]] = i;
1883       }
1884       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1885       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1886     } else {
1887       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1888     }
1889     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1890 
1891     if (!csorted) {
1892       if (isrow_s == iscol_s) {
1893         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1894         is_perm_c = is_perm_r;
1895       } else {
1896         PetscInt *idxs_c,i;
1897         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1898         for (i=0;i<csize;i++) {
1899           idxs_c[idxs_perm_c[i]] = i;
1900         }
1901         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1902         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1903       }
1904     } else {
1905       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1906     }
1907     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1908 
1909     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1910     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1911     work_mat[0] = new_mat;
1912     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1913     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1914   }
1915 
1916   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1917   *B = work_mat[0];
1918   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1919   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1920   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1926 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1927 {
1928   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1929   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1930   Mat            new_mat;
1931   IS             is_local,is_global;
1932   PetscInt       local_size;
1933   PetscBool      isseqaij;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1938   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1939   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1940   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1941   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1942   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1943   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1944 
1945   /* check */
1946   if (pcbddc->dbg_flag) {
1947     Vec       x,x_change;
1948     PetscReal error;
1949 
1950     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1951     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1952     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1953     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1954     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1955     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1956     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1957     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1958     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1959     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1960     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1961     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1962     ierr = VecDestroy(&x);CHKERRQ(ierr);
1963     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1964   }
1965 
1966   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1967   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1968   if (isseqaij) {
1969     Mat M;
1970 
1971     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
1972     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
1973     ierr = MatDestroy(&M);CHKERRQ(ierr);
1974   } else {
1975     Mat work_mat,M;
1976 
1977     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1978     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
1979     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
1980     ierr = MatDestroy(&M);CHKERRQ(ierr);
1981     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1982   }
1983   if (matis->A->symmetric_set) {
1984     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1985 #if !defined(PETSC_USE_COMPLEX)
1986     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1987 #endif
1988   }
1989   /*
1990   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1991   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1992   */
1993   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1999 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2000 {
2001   PC_IS*          pcis = (PC_IS*)(pc->data);
2002   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2003   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2004   PetscInt        *idx_R_local=NULL;
2005   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2006   PetscInt        vbs,bs;
2007   PetscBT         bitmask=NULL;
2008   PetscErrorCode  ierr;
2009 
2010   PetscFunctionBegin;
2011   /*
2012     No need to setup local scatters if
2013       - primal space is unchanged
2014         AND
2015       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2016         AND
2017       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2018   */
2019   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2020     PetscFunctionReturn(0);
2021   }
2022   /* destroy old objects */
2023   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2024   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2025   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2026   /* Set Non-overlapping dimensions */
2027   n_B = pcis->n_B;
2028   n_D = pcis->n - n_B;
2029   n_vertices = pcbddc->n_vertices;
2030 
2031   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2032 
2033   /* create auxiliary bitmask and allocate workspace */
2034   if (!sub_schurs->reuse_mumps) {
2035     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2036     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2037     for (i=0;i<n_vertices;i++) {
2038       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2039     }
2040 
2041     for (i=0, n_R=0; i<pcis->n; i++) {
2042       if (!PetscBTLookup(bitmask,i)) {
2043         idx_R_local[n_R++] = i;
2044       }
2045     }
2046   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2047     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2048 
2049     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2050     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2051   }
2052 
2053   /* Block code */
2054   vbs = 1;
2055   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2056   if (bs>1 && !(n_vertices%bs)) {
2057     PetscBool is_blocked = PETSC_TRUE;
2058     PetscInt  *vary;
2059     if (!sub_schurs->reuse_mumps) {
2060       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2061       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2062       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2063       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2064       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2065       for (i=0; i<pcis->n/bs; i++) {
2066         if (vary[i]!=0 && vary[i]!=bs) {
2067           is_blocked = PETSC_FALSE;
2068           break;
2069         }
2070       }
2071       ierr = PetscFree(vary);CHKERRQ(ierr);
2072     } else {
2073       /* Verify directly the R set */
2074       for (i=0; i<n_R/bs; i++) {
2075         PetscInt j,node=idx_R_local[bs*i];
2076         for (j=1; j<bs; j++) {
2077           if (node != idx_R_local[bs*i+j]-j) {
2078             is_blocked = PETSC_FALSE;
2079             break;
2080           }
2081         }
2082       }
2083     }
2084     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2085       vbs = bs;
2086       for (i=0;i<n_R/vbs;i++) {
2087         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2088       }
2089     }
2090   }
2091   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2092   if (sub_schurs->reuse_mumps) {
2093     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2094 
2095     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2096     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2097     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2098     reuse_mumps->is_R = pcbddc->is_R_local;
2099   } else {
2100     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2101   }
2102 
2103   /* print some info if requested */
2104   if (pcbddc->dbg_flag) {
2105     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2106     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2107     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2108     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2109     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2110     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2111     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2112   }
2113 
2114   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2115   if (!sub_schurs->reuse_mumps) {
2116     IS       is_aux1,is_aux2;
2117     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2118 
2119     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2120     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2121     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2122     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2123     for (i=0; i<n_D; i++) {
2124       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2125     }
2126     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2127     for (i=0, j=0; i<n_R; i++) {
2128       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2129         aux_array1[j++] = i;
2130       }
2131     }
2132     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2133     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2134     for (i=0, j=0; i<n_B; i++) {
2135       if (!PetscBTLookup(bitmask,is_indices[i])) {
2136         aux_array2[j++] = i;
2137       }
2138     }
2139     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2140     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2141     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2142     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2143     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2144 
2145     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2146       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2147       for (i=0, j=0; i<n_R; i++) {
2148         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2149           aux_array1[j++] = i;
2150         }
2151       }
2152       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2153       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2154       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2155     }
2156     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2157     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2158   } else {
2159     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2160     IS               tis;
2161     PetscInt         schur_size;
2162 
2163     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2164     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2165     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2166     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2167     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2168       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2169       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2170       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2171     }
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2179 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2180 {
2181   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2182   PC_IS          *pcis = (PC_IS*)pc->data;
2183   PC             pc_temp;
2184   Mat            A_RR;
2185   MatReuse       reuse;
2186   PetscScalar    m_one = -1.0;
2187   PetscReal      value;
2188   PetscInt       n_D,n_R;
2189   PetscBool      use_exact,use_exact_reduced,issbaij;
2190   PetscErrorCode ierr;
2191   /* prefixes stuff */
2192   char           dir_prefix[256],neu_prefix[256],str_level[16];
2193   size_t         len;
2194 
2195   PetscFunctionBegin;
2196 
2197   /* compute prefixes */
2198   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2199   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2200   if (!pcbddc->current_level) {
2201     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2202     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2203     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2204     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2205   } else {
2206     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2207     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2208     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2209     len -= 15; /* remove "pc_bddc_coarse_" */
2210     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2211     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2212     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2213     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2214     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2215     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2216     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2217     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2218   }
2219 
2220   /* DIRICHLET PROBLEM */
2221   if (dirichlet) {
2222     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2223     if (pcbddc->local_mat->symmetric_set) {
2224       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2225     }
2226     /* Matrix for Dirichlet problem is pcis->A_II */
2227     n_D = pcis->n - pcis->n_B;
2228     if (!pcbddc->ksp_D) { /* create object if not yet build */
2229       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2230       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2231       /* default */
2232       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2233       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2234       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2235       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2236       if (issbaij) {
2237         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2238       } else {
2239         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2240       }
2241       /* Allow user's customization */
2242       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2243       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2244     }
2245     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2246     if (sub_schurs->reuse_mumps) {
2247       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2248 
2249       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2250     }
2251     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2252     if (!n_D) {
2253       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2254       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2255     }
2256     /* Set Up KSP for Dirichlet problem of BDDC */
2257     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2258     /* set ksp_D into pcis data */
2259     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2260     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2261     pcis->ksp_D = pcbddc->ksp_D;
2262   }
2263 
2264   /* NEUMANN PROBLEM */
2265   A_RR = 0;
2266   if (neumann) {
2267     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2268     PetscInt        ibs,mbs;
2269     PetscBool       issbaij;
2270     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2271     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2272     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2273     if (pcbddc->ksp_R) { /* already created ksp */
2274       PetscInt nn_R;
2275       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2276       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2277       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2278       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2279         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2280         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2281         reuse = MAT_INITIAL_MATRIX;
2282       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2283         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2284           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2285           reuse = MAT_INITIAL_MATRIX;
2286         } else { /* safe to reuse the matrix */
2287           reuse = MAT_REUSE_MATRIX;
2288         }
2289       }
2290       /* last check */
2291       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2292         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2293         reuse = MAT_INITIAL_MATRIX;
2294       }
2295     } else { /* first time, so we need to create the matrix */
2296       reuse = MAT_INITIAL_MATRIX;
2297     }
2298     /* extract A_RR */
2299     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2300     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2301     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2302     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2303       if (matis->A == pcbddc->local_mat) {
2304         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2305         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2306       } else {
2307         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2308       }
2309     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2310       if (matis->A == pcbddc->local_mat) {
2311         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2312         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2313       } else {
2314         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2315       }
2316     }
2317     if (!sub_schurs->reuse_mumps) {
2318       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2319       if (pcbddc->local_mat->symmetric_set) {
2320         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2321       }
2322     } else {
2323       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2324 
2325       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2326       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
2327       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2328     }
2329     if (!pcbddc->ksp_R) { /* create object if not present */
2330       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2331       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2332       /* default */
2333       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2334       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2335       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2336       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2337       if (issbaij) {
2338         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2339       } else {
2340         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2341       }
2342       /* Allow user's customization */
2343       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2344       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2345     }
2346     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2347     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2348     if (!n_R) {
2349       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2350       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2351     }
2352     /* Reuse MUMPS solver if it is present */
2353     if (sub_schurs->reuse_mumps) {
2354       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2355 
2356       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2357     }
2358     /* Set Up KSP for Neumann problem of BDDC */
2359     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2360   }
2361   /* free Neumann problem's matrix */
2362   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2363 
2364   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2365   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2366     if (pcbddc->dbg_flag) {
2367       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2368       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2369       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2370     }
2371     if (dirichlet) { /* Dirichlet */
2372       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2373       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2374       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2375       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2376       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2377       /* need to be adapted? */
2378       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2379       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2380       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2381       /* print info */
2382       if (pcbddc->dbg_flag) {
2383         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2384         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2385       }
2386       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2387         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2388       }
2389     }
2390     if (neumann) { /* Neumann */
2391       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2392       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2393       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2394       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2395       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2396       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2397       /* need to be adapted? */
2398       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2399       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2400       /* print info */
2401       if (pcbddc->dbg_flag) {
2402         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2403         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2404       }
2405       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2406         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2407       }
2408     }
2409   }
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2415 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2416 {
2417   PetscErrorCode  ierr;
2418   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2419   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2420 
2421   PetscFunctionBegin;
2422   if (!sub_schurs->reuse_mumps) {
2423     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2424   }
2425   if (!pcbddc->switch_static) {
2426     if (applytranspose && pcbddc->local_auxmat1) {
2427       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2428       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2429     }
2430     if (!sub_schurs->reuse_mumps) {
2431       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2432       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2433     } else {
2434       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2435 
2436       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2437       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2438     }
2439   } else {
2440     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2441     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2442     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2443     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2444     if (applytranspose && pcbddc->local_auxmat1) {
2445       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2446       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2447       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2448       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2449     }
2450   }
2451   if (!sub_schurs->reuse_mumps) {
2452     if (applytranspose) {
2453       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2454     } else {
2455       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2456     }
2457 #if defined(PETSC_HAVE_MUMPS)
2458   } else {
2459     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2460 
2461     if (applytranspose) {
2462       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2463     } else {
2464       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2465     }
2466 #endif
2467   }
2468   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2469   if (!pcbddc->switch_static) {
2470     if (!sub_schurs->reuse_mumps) {
2471       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2472       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2473     } else {
2474       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2475 
2476       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2477       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2478     }
2479     if (!applytranspose && pcbddc->local_auxmat1) {
2480       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2481       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2482     }
2483   } else {
2484     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2485     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2486     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2487     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2488     if (!applytranspose && pcbddc->local_auxmat1) {
2489       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2490       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2491     }
2492     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2493     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2494     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2495     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2496   }
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2501 #undef __FUNCT__
2502 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2503 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2504 {
2505   PetscErrorCode ierr;
2506   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2507   PC_IS*            pcis = (PC_IS*)  (pc->data);
2508   const PetscScalar zero = 0.0;
2509 
2510   PetscFunctionBegin;
2511   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2512   if (applytranspose) {
2513     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2514     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2515   } else {
2516     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2517     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2518   }
2519 
2520   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2521   if (pcbddc->benign_n) {
2522     PetscScalar *array;
2523     PetscInt    j;
2524 
2525     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2526     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2527     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2528   }
2529 
2530   /* start communications from local primal nodes to rhs of coarse solver */
2531   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2532   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2533   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2534 
2535   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2536   /* TODO remove null space when doing multilevel */
2537   if (pcbddc->coarse_ksp) {
2538     Mat coarse_mat;
2539     Vec rhs,sol;
2540     MatNullSpace nullsp;
2541 
2542     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2543     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2544     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2545     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2546     if (nullsp) {
2547       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2548     }
2549     if (applytranspose) {
2550       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2551     } else {
2552       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2553     }
2554     if (nullsp) {
2555       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2556     }
2557   }
2558 
2559   /* Local solution on R nodes */
2560   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2561     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2562   }
2563 
2564   /* communications from coarse sol to local primal nodes */
2565   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2566   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2567 
2568   /* Sum contributions from two levels */
2569   if (applytranspose) {
2570     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2571     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2572   } else {
2573     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2574     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2575   }
2576   /* store p0 */
2577   if (pcbddc->benign_n) {
2578     PetscScalar *array;
2579     PetscInt    j;
2580 
2581     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2582     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2583     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2584   }
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2590 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2591 {
2592   PetscErrorCode ierr;
2593   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2594   PetscScalar    *array;
2595   Vec            from,to;
2596 
2597   PetscFunctionBegin;
2598   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2599     from = pcbddc->coarse_vec;
2600     to = pcbddc->vec1_P;
2601     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2602       Vec tvec;
2603 
2604       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2605       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2606       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2607       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2608       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2609       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2610     }
2611   } else { /* from local to global -> put data in coarse right hand side */
2612     from = pcbddc->vec1_P;
2613     to = pcbddc->coarse_vec;
2614   }
2615   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2621 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2622 {
2623   PetscErrorCode ierr;
2624   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2625   PetscScalar    *array;
2626   Vec            from,to;
2627 
2628   PetscFunctionBegin;
2629   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2630     from = pcbddc->coarse_vec;
2631     to = pcbddc->vec1_P;
2632   } else { /* from local to global -> put data in coarse right hand side */
2633     from = pcbddc->vec1_P;
2634     to = pcbddc->coarse_vec;
2635   }
2636   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2637   if (smode == SCATTER_FORWARD) {
2638     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2639       Vec tvec;
2640 
2641       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2642       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2643       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2644       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2645     }
2646   } else {
2647     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2648      ierr = VecResetArray(from);CHKERRQ(ierr);
2649     }
2650   }
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 /* uncomment for testing purposes */
2655 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2656 #undef __FUNCT__
2657 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2658 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2659 {
2660   PetscErrorCode    ierr;
2661   PC_IS*            pcis = (PC_IS*)(pc->data);
2662   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2663   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2664   /* one and zero */
2665   PetscScalar       one=1.0,zero=0.0;
2666   /* space to store constraints and their local indices */
2667   PetscScalar       *constraints_data;
2668   PetscInt          *constraints_idxs,*constraints_idxs_B;
2669   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2670   PetscInt          *constraints_n;
2671   /* iterators */
2672   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2673   /* BLAS integers */
2674   PetscBLASInt      lwork,lierr;
2675   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2676   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2677   /* reuse */
2678   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2679   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2680   /* change of basis */
2681   PetscBool         qr_needed;
2682   PetscBT           change_basis,qr_needed_idx;
2683   /* auxiliary stuff */
2684   PetscInt          *nnz,*is_indices;
2685   PetscInt          ncc;
2686   /* some quantities */
2687   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2688   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2689 
2690   PetscFunctionBegin;
2691   /* Destroy Mat objects computed previously */
2692   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2693   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2694   /* save info on constraints from previous setup (if any) */
2695   olocal_primal_size = pcbddc->local_primal_size;
2696   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2697   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2698   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2699   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2700   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2701   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2702 
2703   /* print some info */
2704   if (pcbddc->dbg_flag) {
2705     IS       vertices;
2706     PetscInt nv,nedges,nfaces;
2707     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2708     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2709     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2710     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2711     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2712     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2713     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2714     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2715     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2716   }
2717 
2718   if (!pcbddc->adaptive_selection) {
2719     IS           ISForVertices,*ISForFaces,*ISForEdges;
2720     MatNullSpace nearnullsp;
2721     const Vec    *nearnullvecs;
2722     Vec          *localnearnullsp;
2723     PetscScalar  *array;
2724     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2725     PetscBool    nnsp_has_cnst;
2726     /* LAPACK working arrays for SVD or POD */
2727     PetscBool    skip_lapack,boolforchange;
2728     PetscScalar  *work;
2729     PetscReal    *singular_vals;
2730 #if defined(PETSC_USE_COMPLEX)
2731     PetscReal    *rwork;
2732 #endif
2733 #if defined(PETSC_MISSING_LAPACK_GESVD)
2734     PetscScalar  *temp_basis,*correlation_mat;
2735 #else
2736     PetscBLASInt dummy_int=1;
2737     PetscScalar  dummy_scalar=1.;
2738 #endif
2739 
2740     /* Get index sets for faces, edges and vertices from graph */
2741     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2742     /* free unneeded index sets */
2743     if (!pcbddc->use_vertices) {
2744       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2745     }
2746     if (!pcbddc->use_edges) {
2747       for (i=0;i<n_ISForEdges;i++) {
2748         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2749       }
2750       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2751       n_ISForEdges = 0;
2752     }
2753     if (!pcbddc->use_faces) {
2754       for (i=0;i<n_ISForFaces;i++) {
2755         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2756       }
2757       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2758       n_ISForFaces = 0;
2759     }
2760 
2761 #if defined(PETSC_USE_DEBUG)
2762     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2763        Also use_change_of_basis should be consistent among processors */
2764     if (pcbddc->NullSpace) {
2765       PetscBool tbool[2],gbool[2];
2766 
2767       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2768         pcbddc->use_change_of_basis = PETSC_TRUE;
2769         if (!ISForEdges) {
2770           pcbddc->use_change_on_faces = PETSC_TRUE;
2771         }
2772       }
2773       tbool[0] = pcbddc->use_change_of_basis;
2774       tbool[1] = pcbddc->use_change_on_faces;
2775       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2776       pcbddc->use_change_of_basis = gbool[0];
2777       pcbddc->use_change_on_faces = gbool[1];
2778     }
2779 #endif
2780 
2781     /* check if near null space is attached to global mat */
2782     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2783     if (nearnullsp) {
2784       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2785       /* remove any stored info */
2786       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2787       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2788       /* store information for BDDC solver reuse */
2789       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2790       pcbddc->onearnullspace = nearnullsp;
2791       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2792       for (i=0;i<nnsp_size;i++) {
2793         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2794       }
2795     } else { /* if near null space is not provided BDDC uses constants by default */
2796       nnsp_size = 0;
2797       nnsp_has_cnst = PETSC_TRUE;
2798     }
2799     /* get max number of constraints on a single cc */
2800     max_constraints = nnsp_size;
2801     if (nnsp_has_cnst) max_constraints++;
2802 
2803     /*
2804          Evaluate maximum storage size needed by the procedure
2805          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2806          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2807          There can be multiple constraints per connected component
2808                                                                                                                                                            */
2809     n_vertices = 0;
2810     if (ISForVertices) {
2811       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2812     }
2813     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2814     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2815 
2816     total_counts = n_ISForFaces+n_ISForEdges;
2817     total_counts *= max_constraints;
2818     total_counts += n_vertices;
2819     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2820 
2821     total_counts = 0;
2822     max_size_of_constraint = 0;
2823     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2824       IS used_is;
2825       if (i<n_ISForEdges) {
2826         used_is = ISForEdges[i];
2827       } else {
2828         used_is = ISForFaces[i-n_ISForEdges];
2829       }
2830       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2831       total_counts += j;
2832       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2833     }
2834     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2835 
2836     /* get local part of global near null space vectors */
2837     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2838     for (k=0;k<nnsp_size;k++) {
2839       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2840       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2841       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842     }
2843 
2844     /* whether or not to skip lapack calls */
2845     skip_lapack = PETSC_TRUE;
2846     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2847 
2848     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2849     if (!skip_lapack) {
2850       PetscScalar temp_work;
2851 
2852 #if defined(PETSC_MISSING_LAPACK_GESVD)
2853       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2854       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2855       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2856       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2857 #if defined(PETSC_USE_COMPLEX)
2858       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2859 #endif
2860       /* now we evaluate the optimal workspace using query with lwork=-1 */
2861       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2862       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2863       lwork = -1;
2864       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2865 #if !defined(PETSC_USE_COMPLEX)
2866       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2867 #else
2868       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2869 #endif
2870       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2871       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2872 #else /* on missing GESVD */
2873       /* SVD */
2874       PetscInt max_n,min_n;
2875       max_n = max_size_of_constraint;
2876       min_n = max_constraints;
2877       if (max_size_of_constraint < max_constraints) {
2878         min_n = max_size_of_constraint;
2879         max_n = max_constraints;
2880       }
2881       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2882 #if defined(PETSC_USE_COMPLEX)
2883       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2884 #endif
2885       /* now we evaluate the optimal workspace using query with lwork=-1 */
2886       lwork = -1;
2887       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2888       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2889       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2890       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2891 #if !defined(PETSC_USE_COMPLEX)
2892       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2893 #else
2894       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2895 #endif
2896       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2897       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2898 #endif /* on missing GESVD */
2899       /* Allocate optimal workspace */
2900       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2901       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2902     }
2903     /* Now we can loop on constraining sets */
2904     total_counts = 0;
2905     constraints_idxs_ptr[0] = 0;
2906     constraints_data_ptr[0] = 0;
2907     /* vertices */
2908     if (n_vertices) {
2909       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2910       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2911       for (i=0;i<n_vertices;i++) {
2912         constraints_n[total_counts] = 1;
2913         constraints_data[total_counts] = 1.0;
2914         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2915         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2916         total_counts++;
2917       }
2918       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2919       n_vertices = total_counts;
2920     }
2921 
2922     /* edges and faces */
2923     total_counts_cc = total_counts;
2924     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2925       IS        used_is;
2926       PetscBool idxs_copied = PETSC_FALSE;
2927 
2928       if (ncc<n_ISForEdges) {
2929         used_is = ISForEdges[ncc];
2930         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2931       } else {
2932         used_is = ISForFaces[ncc-n_ISForEdges];
2933         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2934       }
2935       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2936 
2937       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2938       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2939       /* change of basis should not be performed on local periodic nodes */
2940       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2941       if (nnsp_has_cnst) {
2942         PetscScalar quad_value;
2943 
2944         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2945         idxs_copied = PETSC_TRUE;
2946 
2947         if (!pcbddc->use_nnsp_true) {
2948           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2949         } else {
2950           quad_value = 1.0;
2951         }
2952         for (j=0;j<size_of_constraint;j++) {
2953           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2954         }
2955         temp_constraints++;
2956         total_counts++;
2957       }
2958       for (k=0;k<nnsp_size;k++) {
2959         PetscReal real_value;
2960         PetscScalar *ptr_to_data;
2961 
2962         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2963         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2964         for (j=0;j<size_of_constraint;j++) {
2965           ptr_to_data[j] = array[is_indices[j]];
2966         }
2967         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2968         /* check if array is null on the connected component */
2969         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2970         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2971         if (real_value > 0.0) { /* keep indices and values */
2972           temp_constraints++;
2973           total_counts++;
2974           if (!idxs_copied) {
2975             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2976             idxs_copied = PETSC_TRUE;
2977           }
2978         }
2979       }
2980       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2981       valid_constraints = temp_constraints;
2982       if (!pcbddc->use_nnsp_true && temp_constraints) {
2983         if (temp_constraints == 1) { /* just normalize the constraint */
2984           PetscScalar norm,*ptr_to_data;
2985 
2986           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2987           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2988           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2989           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2990           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2991         } else { /* perform SVD */
2992           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2993           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2994 
2995 #if defined(PETSC_MISSING_LAPACK_GESVD)
2996           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2997              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2998              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2999                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3000                 from that computed using LAPACKgesvd
3001              -> This is due to a different computation of eigenvectors in LAPACKheev
3002              -> The quality of the POD-computed basis will be the same */
3003           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3004           /* Store upper triangular part of correlation matrix */
3005           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3006           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3007           for (j=0;j<temp_constraints;j++) {
3008             for (k=0;k<j+1;k++) {
3009               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3010             }
3011           }
3012           /* compute eigenvalues and eigenvectors of correlation matrix */
3013           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3014           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3015 #if !defined(PETSC_USE_COMPLEX)
3016           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3017 #else
3018           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3019 #endif
3020           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3021           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3022           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3023           j = 0;
3024           while (j < temp_constraints && singular_vals[j] < tol) j++;
3025           total_counts = total_counts-j;
3026           valid_constraints = temp_constraints-j;
3027           /* scale and copy POD basis into used quadrature memory */
3028           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3029           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3030           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3032           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3033           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3034           if (j<temp_constraints) {
3035             PetscInt ii;
3036             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3037             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3038             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3039             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3040             for (k=0;k<temp_constraints-j;k++) {
3041               for (ii=0;ii<size_of_constraint;ii++) {
3042                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3043               }
3044             }
3045           }
3046 #else  /* on missing GESVD */
3047           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3048           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3049           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3050           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3051 #if !defined(PETSC_USE_COMPLEX)
3052           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3053 #else
3054           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3055 #endif
3056           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3057           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3058           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3059           k = temp_constraints;
3060           if (k > size_of_constraint) k = size_of_constraint;
3061           j = 0;
3062           while (j < k && singular_vals[k-j-1] < tol) j++;
3063           valid_constraints = k-j;
3064           total_counts = total_counts-temp_constraints+valid_constraints;
3065 #endif /* on missing GESVD */
3066         }
3067       }
3068       /* update pointers information */
3069       if (valid_constraints) {
3070         constraints_n[total_counts_cc] = valid_constraints;
3071         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3072         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3073         /* set change_of_basis flag */
3074         if (boolforchange) {
3075           PetscBTSet(change_basis,total_counts_cc);
3076         }
3077         total_counts_cc++;
3078       }
3079     }
3080     /* free workspace */
3081     if (!skip_lapack) {
3082       ierr = PetscFree(work);CHKERRQ(ierr);
3083 #if defined(PETSC_USE_COMPLEX)
3084       ierr = PetscFree(rwork);CHKERRQ(ierr);
3085 #endif
3086       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3087 #if defined(PETSC_MISSING_LAPACK_GESVD)
3088       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3089       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3090 #endif
3091     }
3092     for (k=0;k<nnsp_size;k++) {
3093       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3094     }
3095     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3096     /* free index sets of faces, edges and vertices */
3097     for (i=0;i<n_ISForFaces;i++) {
3098       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3099     }
3100     if (n_ISForFaces) {
3101       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3102     }
3103     for (i=0;i<n_ISForEdges;i++) {
3104       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3105     }
3106     if (n_ISForEdges) {
3107       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3108     }
3109     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3110   } else {
3111     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3112 
3113     total_counts = 0;
3114     n_vertices = 0;
3115     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3116       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3117     }
3118     max_constraints = 0;
3119     total_counts_cc = 0;
3120     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3121       total_counts += pcbddc->adaptive_constraints_n[i];
3122       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3123       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3124     }
3125     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3126     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3127     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3128     constraints_data = pcbddc->adaptive_constraints_data;
3129     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3130     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3131     total_counts_cc = 0;
3132     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3133       if (pcbddc->adaptive_constraints_n[i]) {
3134         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3135       }
3136     }
3137 #if 0
3138     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3139     for (i=0;i<total_counts_cc;i++) {
3140       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3141       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3142       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3143         printf(" %d",constraints_idxs[j]);
3144       }
3145       printf("\n");
3146       printf("number of cc: %d\n",constraints_n[i]);
3147     }
3148     for (i=0;i<n_vertices;i++) {
3149       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3150     }
3151     for (i=0;i<sub_schurs->n_subs;i++) {
3152       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3153     }
3154 #endif
3155 
3156     max_size_of_constraint = 0;
3157     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3158     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3159     /* Change of basis */
3160     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3161     if (pcbddc->use_change_of_basis) {
3162       for (i=0;i<sub_schurs->n_subs;i++) {
3163         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3164           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3165         }
3166       }
3167     }
3168   }
3169   pcbddc->local_primal_size = total_counts;
3170   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3171 
3172   /* map constraints_idxs in boundary numbering */
3173   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3174   if (i != constraints_idxs_ptr[total_counts_cc]) {
3175     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3176   }
3177 
3178   /* Create constraint matrix */
3179   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3180   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3181   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3182 
3183   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3184   /* determine if a QR strategy is needed for change of basis */
3185   qr_needed = PETSC_FALSE;
3186   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3187   total_primal_vertices=0;
3188   pcbddc->local_primal_size_cc = 0;
3189   for (i=0;i<total_counts_cc;i++) {
3190     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3191     if (size_of_constraint == 1) {
3192       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3193       pcbddc->local_primal_size_cc += 1;
3194     } else if (PetscBTLookup(change_basis,i)) {
3195       for (k=0;k<constraints_n[i];k++) {
3196         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3197       }
3198       pcbddc->local_primal_size_cc += constraints_n[i];
3199       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3200         PetscBTSet(qr_needed_idx,i);
3201         qr_needed = PETSC_TRUE;
3202       }
3203     } else {
3204       pcbddc->local_primal_size_cc += 1;
3205     }
3206   }
3207   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3208   pcbddc->n_vertices = total_primal_vertices;
3209   /* permute indices in order to have a sorted set of vertices */
3210   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3211 
3212   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3213   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3214   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3215 
3216   /* nonzero structure of constraint matrix */
3217   /* and get reference dof for local constraints */
3218   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3219   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3220 
3221   j = total_primal_vertices;
3222   total_counts = total_primal_vertices;
3223   cum = total_primal_vertices;
3224   for (i=n_vertices;i<total_counts_cc;i++) {
3225     if (!PetscBTLookup(change_basis,i)) {
3226       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3227       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3228       cum++;
3229       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3230       for (k=0;k<constraints_n[i];k++) {
3231         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3232         nnz[j+k] = size_of_constraint;
3233       }
3234       j += constraints_n[i];
3235     }
3236   }
3237   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3238   ierr = PetscFree(nnz);CHKERRQ(ierr);
3239 
3240   /* set values in constraint matrix */
3241   for (i=0;i<total_primal_vertices;i++) {
3242     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3243   }
3244   total_counts = total_primal_vertices;
3245   for (i=n_vertices;i<total_counts_cc;i++) {
3246     if (!PetscBTLookup(change_basis,i)) {
3247       PetscInt *cols;
3248 
3249       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3250       cols = constraints_idxs+constraints_idxs_ptr[i];
3251       for (k=0;k<constraints_n[i];k++) {
3252         PetscInt    row = total_counts+k;
3253         PetscScalar *vals;
3254 
3255         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3256         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3257       }
3258       total_counts += constraints_n[i];
3259     }
3260   }
3261   /* assembling */
3262   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264 
3265   /*
3266   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3267   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3268   */
3269   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3270   if (pcbddc->use_change_of_basis) {
3271     /* dual and primal dofs on a single cc */
3272     PetscInt     dual_dofs,primal_dofs;
3273     /* working stuff for GEQRF */
3274     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3275     PetscBLASInt lqr_work;
3276     /* working stuff for UNGQR */
3277     PetscScalar  *gqr_work,lgqr_work_t;
3278     PetscBLASInt lgqr_work;
3279     /* working stuff for TRTRS */
3280     PetscScalar  *trs_rhs;
3281     PetscBLASInt Blas_NRHS;
3282     /* pointers for values insertion into change of basis matrix */
3283     PetscInt     *start_rows,*start_cols;
3284     PetscScalar  *start_vals;
3285     /* working stuff for values insertion */
3286     PetscBT      is_primal;
3287     PetscInt     *aux_primal_numbering_B;
3288     /* matrix sizes */
3289     PetscInt     global_size,local_size;
3290     /* temporary change of basis */
3291     Mat          localChangeOfBasisMatrix;
3292     /* extra space for debugging */
3293     PetscScalar  *dbg_work;
3294 
3295     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3296     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3297     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3298     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3299     /* nonzeros for local mat */
3300     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3301     for (i=0;i<pcis->n;i++) nnz[i]=1;
3302     for (i=n_vertices;i<total_counts_cc;i++) {
3303       if (PetscBTLookup(change_basis,i)) {
3304         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3305         if (PetscBTLookup(qr_needed_idx,i)) {
3306           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3307         } else {
3308           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3309           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3310         }
3311       }
3312     }
3313     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3314     ierr = PetscFree(nnz);CHKERRQ(ierr);
3315     /* Set initial identity in the matrix */
3316     for (i=0;i<pcis->n;i++) {
3317       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3318     }
3319 
3320     if (pcbddc->dbg_flag) {
3321       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3322       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3323     }
3324 
3325 
3326     /* Now we loop on the constraints which need a change of basis */
3327     /*
3328        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3329        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3330 
3331        Basic blocks of change of basis matrix T computed by
3332 
3333           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3334 
3335             | 1        0   ...        0         s_1/S |
3336             | 0        1   ...        0         s_2/S |
3337             |              ...                        |
3338             | 0        ...            1     s_{n-1}/S |
3339             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3340 
3341             with S = \sum_{i=1}^n s_i^2
3342             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3343                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3344 
3345           - QR decomposition of constraints otherwise
3346     */
3347     if (qr_needed) {
3348       /* space to store Q */
3349       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3350       /* first we issue queries for optimal work */
3351       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3352       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3353       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3354       lqr_work = -1;
3355       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3356       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3357       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3358       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3359       lgqr_work = -1;
3360       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3361       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3362       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3363       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3364       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3365       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3366       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3367       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3368       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3369       /* array to store scaling factors for reflectors */
3370       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3371       /* array to store rhs and solution of triangular solver */
3372       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3373       /* allocating workspace for check */
3374       if (pcbddc->dbg_flag) {
3375         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3376       }
3377     }
3378     /* array to store whether a node is primal or not */
3379     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3380     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3381     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3382     if (i != total_primal_vertices) {
3383       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3384     }
3385     for (i=0;i<total_primal_vertices;i++) {
3386       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3387     }
3388     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3389 
3390     /* loop on constraints and see whether or not they need a change of basis and compute it */
3391     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3392       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3393       if (PetscBTLookup(change_basis,total_counts)) {
3394         /* get constraint info */
3395         primal_dofs = constraints_n[total_counts];
3396         dual_dofs = size_of_constraint-primal_dofs;
3397 
3398         if (pcbddc->dbg_flag) {
3399           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3400         }
3401 
3402         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3403 
3404           /* copy quadrature constraints for change of basis check */
3405           if (pcbddc->dbg_flag) {
3406             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3407           }
3408           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3409           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3410 
3411           /* compute QR decomposition of constraints */
3412           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3413           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3414           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3415           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3416           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3417           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3418           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3419 
3420           /* explictly compute R^-T */
3421           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3422           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3423           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3424           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3425           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3426           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3427           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3428           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3429           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3430           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3431 
3432           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3433           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3434           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3435           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3436           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3437           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3438           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3439           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3440           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3441 
3442           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3443              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3444              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3445           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3446           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3447           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3448           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3449           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3450           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3451           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3452           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3453           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3454           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3455 
3456           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3457           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3458           /* insert cols for primal dofs */
3459           for (j=0;j<primal_dofs;j++) {
3460             start_vals = &qr_basis[j*size_of_constraint];
3461             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3462             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3463           }
3464           /* insert cols for dual dofs */
3465           for (j=0,k=0;j<dual_dofs;k++) {
3466             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3467               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3468               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3469               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3470               j++;
3471             }
3472           }
3473 
3474           /* check change of basis */
3475           if (pcbddc->dbg_flag) {
3476             PetscInt   ii,jj;
3477             PetscBool valid_qr=PETSC_TRUE;
3478             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3479             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3480             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3481             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3482             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3483             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3484             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3485             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3486             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3487             for (jj=0;jj<size_of_constraint;jj++) {
3488               for (ii=0;ii<primal_dofs;ii++) {
3489                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3490                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3491               }
3492             }
3493             if (!valid_qr) {
3494               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3495               for (jj=0;jj<size_of_constraint;jj++) {
3496                 for (ii=0;ii<primal_dofs;ii++) {
3497                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3498                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3499                   }
3500                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3501                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3502                   }
3503                 }
3504               }
3505             } else {
3506               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3507             }
3508           }
3509         } else { /* simple transformation block */
3510           PetscInt    row,col;
3511           PetscScalar val,norm;
3512 
3513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3514           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3515           for (j=0;j<size_of_constraint;j++) {
3516             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3517             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3518             if (!PetscBTLookup(is_primal,row_B)) {
3519               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3520               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3521               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3522             } else {
3523               for (k=0;k<size_of_constraint;k++) {
3524                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3525                 if (row != col) {
3526                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3527                 } else {
3528                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3529                 }
3530                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3531               }
3532             }
3533           }
3534           if (pcbddc->dbg_flag) {
3535             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3536           }
3537         }
3538       } else {
3539         if (pcbddc->dbg_flag) {
3540           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3541         }
3542       }
3543     }
3544 
3545     /* free workspace */
3546     if (qr_needed) {
3547       if (pcbddc->dbg_flag) {
3548         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3549       }
3550       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3551       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3552       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3553       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3554       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3555     }
3556     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3557     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3558     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3559 
3560     /* assembling of global change of variable */
3561     {
3562       Mat      tmat;
3563       PetscInt bs;
3564 
3565       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3566       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3567       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3568       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3569       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3570       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3571       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3572       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3573       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3574       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3575       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3576       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3577       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3578       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3579       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3580       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3581       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3582       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3583     }
3584     /* check */
3585     if (pcbddc->dbg_flag) {
3586       PetscReal error;
3587       Vec       x,x_change;
3588 
3589       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3590       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3591       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3592       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3593       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3594       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3595       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3596       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3597       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3598       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3599       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3600       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3601       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3602       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3603       ierr = VecDestroy(&x);CHKERRQ(ierr);
3604       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3605     }
3606 
3607     /* adapt sub_schurs computed (if any) */
3608     if (pcbddc->use_deluxe_scaling) {
3609       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3610       if (sub_schurs->S_Ej_all) {
3611         Mat                    S_new,tmat;
3612         ISLocalToGlobalMapping NtoSall;
3613         IS                     is_all_N,is_V,is_V_Sall;
3614         const PetscScalar      *array;
3615         const PetscInt         *idxs_V,*idxs_all;
3616         PetscInt               i,n_V;
3617 
3618         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3619         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3620         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3621         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3622         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3623         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3624         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3625         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3626         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3627         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3628         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3629         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3630         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3631         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3632         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3633         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3634         for (i=0;i<n_V;i++) {
3635           PetscScalar val;
3636           PetscInt    idx;
3637 
3638           idx = idxs_V[i];
3639           val = array[idxs_all[idxs_V[i]]];
3640           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3641         }
3642         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3643         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3644         sub_schurs->S_Ej_all = S_new;
3645         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3646         if (sub_schurs->sum_S_Ej_all) {
3647           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3648           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3649           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3650           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3651           sub_schurs->sum_S_Ej_all = S_new;
3652           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3653         }
3654         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3655         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3656         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3657         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3658         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3659       }
3660     }
3661     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3662   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3663     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3664     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3665   }
3666 
3667   /* set up change of basis context */
3668   if (pcbddc->ChangeOfBasisMatrix) {
3669     PCBDDCChange_ctx change_ctx;
3670 
3671     if (!pcbddc->new_global_mat) {
3672       PetscInt global_size,local_size;
3673 
3674       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3675       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3676       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3677       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3678       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3679       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3680       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3681       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3682       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3683     } else {
3684       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3685       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3686       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3687     }
3688     if (!pcbddc->user_ChangeOfBasisMatrix) {
3689       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3690       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3691     } else {
3692       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3693       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3694     }
3695     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3696     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3697     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   }
3700 
3701   /* add pressure dofs to set of primal nodes for numbering purposes */
3702   for (i=0;i<pcbddc->benign_n;i++) {
3703     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3704     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3705     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3706     pcbddc->local_primal_size_cc++;
3707     pcbddc->local_primal_size++;
3708   }
3709 
3710   /* check if a new primal space has been introduced (also take into account benign trick) */
3711   pcbddc->new_primal_space_local = PETSC_TRUE;
3712   if (olocal_primal_size == pcbddc->local_primal_size) {
3713     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3714     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3715     if (!pcbddc->new_primal_space_local) {
3716       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3717       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3718     }
3719   }
3720   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3721   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3722   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3723 
3724   /* flush dbg viewer */
3725   if (pcbddc->dbg_flag) {
3726     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3727   }
3728 
3729   /* free workspace */
3730   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3731   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3732   if (!pcbddc->adaptive_selection) {
3733     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3734     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3735   } else {
3736     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3737                       pcbddc->adaptive_constraints_idxs_ptr,
3738                       pcbddc->adaptive_constraints_data_ptr,
3739                       pcbddc->adaptive_constraints_idxs,
3740                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3741     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3742     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3743   }
3744   PetscFunctionReturn(0);
3745 }
3746 
3747 #undef __FUNCT__
3748 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3749 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3750 {
3751   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3752   PC_IS       *pcis = (PC_IS*)pc->data;
3753   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3754   PetscInt    ierr,i,vertex_size,N;
3755   PetscViewer viewer=pcbddc->dbg_viewer;
3756 
3757   PetscFunctionBegin;
3758   /* Reset previously computed graph */
3759   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3760   /* Init local Graph struct */
3761   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3762   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3763 
3764   /* Check validity of the csr graph passed in by the user */
3765   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3766     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3767   }
3768 
3769   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3770   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3771     PetscInt  *xadj,*adjncy;
3772     PetscInt  nvtxs;
3773     PetscBool flg_row=PETSC_FALSE;
3774 
3775     if (pcbddc->use_local_adj) {
3776 
3777       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3778       if (flg_row) {
3779         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3780         pcbddc->computed_rowadj = PETSC_TRUE;
3781       }
3782       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3783     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3784       IS                     is_dummy;
3785       ISLocalToGlobalMapping l2gmap_dummy;
3786       PetscInt               j,sum;
3787       PetscInt               *cxadj,*cadjncy;
3788       const PetscInt         *idxs;
3789       PCBDDCGraph            graph;
3790       PetscBT                is_on_boundary;
3791 
3792       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3793       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3794       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3795       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3796       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3797       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3798       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3799       if (flg_row) {
3800         graph->xadj = xadj;
3801         graph->adjncy = adjncy;
3802       }
3803       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3804       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3805       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3806 
3807       if (pcbddc->dbg_flag) {
3808         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3809         for (i=0;i<graph->ncc;i++) {
3810           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3811         }
3812       }
3813 
3814       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3815       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3816       for (i=0;i<pcis->n_B;i++) {
3817         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3818       }
3819       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3820 
3821       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3822       sum = 0;
3823       for (i=0;i<graph->ncc;i++) {
3824         PetscInt sizecc = 0;
3825         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3826           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3827             sizecc++;
3828           }
3829         }
3830         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3831           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3832             cxadj[graph->queue[j]] = sizecc;
3833           }
3834         }
3835         sum += sizecc*sizecc;
3836       }
3837       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3838       sum = 0;
3839       for (i=0;i<pcis->n;i++) {
3840         PetscInt temp = cxadj[i];
3841         cxadj[i] = sum;
3842         sum += temp;
3843       }
3844       cxadj[pcis->n] = sum;
3845       for (i=0;i<graph->ncc;i++) {
3846         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3847           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3848             PetscInt k,sizecc = 0;
3849             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3850               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3851                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3852                 sizecc++;
3853               }
3854             }
3855           }
3856         }
3857       }
3858       if (sum) {
3859         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3860       } else {
3861         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3862         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3863       }
3864       graph->xadj = 0;
3865       graph->adjncy = 0;
3866       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3867       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3868     }
3869   }
3870   if (pcbddc->dbg_flag) {
3871     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3872   }
3873 
3874   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3875   vertex_size = 1;
3876   if (pcbddc->user_provided_isfordofs) {
3877     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3878       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3879       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3880         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3881         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3882       }
3883       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3884       pcbddc->n_ISForDofs = 0;
3885       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3886     }
3887     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3888     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3889   } else {
3890     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3891       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3892       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3893       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3894         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3895       }
3896     }
3897   }
3898 
3899   /* Setup of Graph */
3900   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3901     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3902   }
3903   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3904     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3905   }
3906   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3907 
3908   /* attach info on disconnected subdomains if present */
3909   if (pcbddc->n_local_subs) {
3910     PetscInt *local_subs;
3911 
3912     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3913     for (i=0;i<pcbddc->n_local_subs;i++) {
3914       const PetscInt *idxs;
3915       PetscInt       nl,j;
3916 
3917       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3918       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3919       for (j=0;j<nl;j++) {
3920         local_subs[idxs[j]] = i;
3921       }
3922       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3923     }
3924     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3925     pcbddc->mat_graph->local_subs = local_subs;
3926   }
3927 
3928   /* Graph's connected components analysis */
3929   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3930 
3931   /* print some info to stdout */
3932   if (pcbddc->dbg_flag) {
3933     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3934   }
3935 
3936   /* mark topography has done */
3937   pcbddc->recompute_topography = PETSC_FALSE;
3938   PetscFunctionReturn(0);
3939 }
3940 
3941 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3942 #undef __FUNCT__
3943 #define __FUNCT__ "PCBDDCSubsetNumbering"
3944 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3945 {
3946   PetscSF        sf;
3947   PetscLayout    map;
3948   const PetscInt *idxs;
3949   PetscInt       *leaf_data,*root_data,*gidxs;
3950   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3951   PetscInt       n_n,nlocals,start,first_index;
3952   PetscMPIInt    commsize;
3953   PetscBool      first_found;
3954   PetscErrorCode ierr;
3955 
3956   PetscFunctionBegin;
3957   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3958   if (subset_mult) {
3959     PetscCheckSameComm(subset,1,subset_mult,2);
3960     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3961     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3962   }
3963   /* create workspace layout for computing global indices of subset */
3964   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3965   lbounds[0] = lbounds[1] = 0;
3966   for (i=0;i<n;i++) {
3967     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3968     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3969   }
3970   lbounds[0] = -lbounds[0];
3971   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3972   gbounds[0] = -gbounds[0];
3973   N = gbounds[1] - gbounds[0] + 1;
3974   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3975   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3976   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3977   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3978   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3979 
3980   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3981   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3982   if (subset_mult) {
3983     const PetscInt* idxs_mult;
3984 
3985     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3986     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3987     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3988   } else {
3989     for (i=0;i<n;i++) leaf_data[i] = 1;
3990   }
3991   /* local size of new subset */
3992   n_n = 0;
3993   for (i=0;i<n;i++) n_n += leaf_data[i];
3994 
3995   /* global indexes in layout */
3996   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3997   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3998   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3999   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4000   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4001   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4002 
4003   /* reduce from leaves to roots */
4004   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4005   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4006   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4007 
4008   /* count indexes in local part of layout */
4009   nlocals = 0;
4010   first_index = -1;
4011   first_found = PETSC_FALSE;
4012   for (i=0;i<Nl;i++) {
4013     if (!first_found && root_data[i]) {
4014       first_found = PETSC_TRUE;
4015       first_index = i;
4016     }
4017     nlocals += root_data[i];
4018   }
4019 
4020   /* cumulative of number of indexes and size of subset without holes */
4021 #if defined(PETSC_HAVE_MPI_EXSCAN)
4022   start = 0;
4023   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4024 #else
4025   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4026   start = start-nlocals;
4027 #endif
4028 
4029   if (N_n) { /* compute total size of new subset if requested */
4030     *N_n = start + nlocals;
4031     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4032     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4033   }
4034 
4035   /* adapt root data with cumulative */
4036   if (first_found) {
4037     PetscInt old_index;
4038 
4039     root_data[first_index] += start;
4040     old_index = first_index;
4041     for (i=first_index+1;i<Nl;i++) {
4042       if (root_data[i]) {
4043         root_data[i] += root_data[old_index];
4044         old_index = i;
4045       }
4046     }
4047   }
4048 
4049   /* from roots to leaves */
4050   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4051   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4052   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4053 
4054   /* create new IS with global indexes without holes */
4055   if (subset_mult) {
4056     const PetscInt* idxs_mult;
4057     PetscInt        cum;
4058 
4059     cum = 0;
4060     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4061     for (i=0;i<n;i++) {
4062       PetscInt j;
4063       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4064     }
4065     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4066   } else {
4067     for (i=0;i<n;i++) {
4068       gidxs[i] = leaf_data[i]-1;
4069     }
4070   }
4071   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4072   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4073   PetscFunctionReturn(0);
4074 }
4075 
4076 #undef __FUNCT__
4077 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4078 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4079 {
4080   PetscInt       i,j;
4081   PetscScalar    *alphas;
4082   PetscErrorCode ierr;
4083 
4084   PetscFunctionBegin;
4085   /* this implements stabilized Gram-Schmidt */
4086   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4087   for (i=0;i<n;i++) {
4088     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4089     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4090     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4091   }
4092   ierr = PetscFree(alphas);CHKERRQ(ierr);
4093   PetscFunctionReturn(0);
4094 }
4095 
4096 #undef __FUNCT__
4097 #define __FUNCT__ "MatISGetSubassemblingPattern"
4098 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4099 {
4100   IS             ranks_send_to;
4101   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4102   PetscMPIInt    size,rank,color;
4103   PetscInt       *xadj,*adjncy;
4104   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4105   PetscInt       i,local_size,threshold=0;
4106   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4107   PetscSubcomm   subcomm;
4108   PetscErrorCode ierr;
4109 
4110   PetscFunctionBegin;
4111   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4112   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4113   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4114 
4115   /* Get info on mapping */
4116   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4117   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4118 
4119   /* build local CSR graph of subdomains' connectivity */
4120   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4121   xadj[0] = 0;
4122   xadj[1] = PetscMax(n_neighs-1,0);
4123   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4124   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4125 
4126   if (threshold) {
4127     PetscInt xadj_count = 0;
4128     for (i=1;i<n_neighs;i++) {
4129       if (n_shared[i] > threshold) {
4130         adjncy[xadj_count] = neighs[i];
4131         adjncy_wgt[xadj_count] = n_shared[i];
4132         xadj_count++;
4133       }
4134     }
4135     xadj[1] = xadj_count;
4136   } else {
4137     if (xadj[1]) {
4138       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4139       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4140     }
4141   }
4142   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4143   if (use_square) {
4144     for (i=0;i<xadj[1];i++) {
4145       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4146     }
4147   }
4148   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4149 
4150   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4151 
4152   /*
4153     Restrict work on active processes only.
4154   */
4155   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4156   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4157   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4158   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4159   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4160   if (color) {
4161     ierr = PetscFree(xadj);CHKERRQ(ierr);
4162     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4163     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4164   } else {
4165     Mat             subdomain_adj;
4166     IS              new_ranks,new_ranks_contig;
4167     MatPartitioning partitioner;
4168     PetscInt        prank,rstart=0,rend=0;
4169     PetscInt        *is_indices,*oldranks;
4170     PetscBool       aggregate;
4171 
4172     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4173     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4174     prank = rank;
4175     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4176     /*
4177     for (i=0;i<size;i++) {
4178       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4179     }
4180     */
4181     for (i=0;i<xadj[1];i++) {
4182       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4183     }
4184     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4185     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4186     if (aggregate) {
4187       PetscInt    lrows,row,ncols,*cols;
4188       PetscMPIInt nrank;
4189       PetscScalar *vals;
4190 
4191       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4192       lrows = 0;
4193       if (nrank<redprocs) {
4194         lrows = size/redprocs;
4195         if (nrank<size%redprocs) lrows++;
4196       }
4197       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4198       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4199       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4200       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4201       row = nrank;
4202       ncols = xadj[1]-xadj[0];
4203       cols = adjncy;
4204       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4205       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4206       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4207       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4208       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4209       ierr = PetscFree(xadj);CHKERRQ(ierr);
4210       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4211       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4212       ierr = PetscFree(vals);CHKERRQ(ierr);
4213     } else {
4214       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4215     }
4216     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4217 
4218     /* Partition */
4219     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4220     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4221     if (use_vwgt) {
4222       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4223       v_wgt[0] = local_size;
4224       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4225     }
4226     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4227     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4228     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4229     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4230     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4231 
4232     /* renumber new_ranks to avoid "holes" in new set of processors */
4233     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4234     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4235     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4236     if (!redprocs) {
4237       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4238     } else {
4239       PetscInt    idxs[1];
4240       PetscMPIInt tag;
4241       MPI_Request *reqs;
4242 
4243       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4244       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4245       for (i=rstart;i<rend;i++) {
4246         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4247       }
4248       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4249       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4250       ierr = PetscFree(reqs);CHKERRQ(ierr);
4251       ranks_send_to_idx[0] = oldranks[idxs[0]];
4252     }
4253     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4254     /* clean up */
4255     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4256     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4257     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4258     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4259   }
4260   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4261 
4262   /* assemble parallel IS for sends */
4263   i = 1;
4264   if (color) i=0;
4265   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4266   /* get back IS */
4267   *is_sends = ranks_send_to;
4268   PetscFunctionReturn(0);
4269 }
4270 
4271 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4272 
4273 #undef __FUNCT__
4274 #define __FUNCT__ "MatISSubassemble"
4275 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4276 {
4277   Mat                    local_mat;
4278   IS                     is_sends_internal;
4279   PetscInt               rows,cols,new_local_rows;
4280   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4281   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4282   ISLocalToGlobalMapping l2gmap;
4283   PetscInt*              l2gmap_indices;
4284   const PetscInt*        is_indices;
4285   MatType                new_local_type;
4286   /* buffers */
4287   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4288   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4289   PetscInt               *recv_buffer_idxs_local;
4290   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4291   /* MPI */
4292   MPI_Comm               comm,comm_n;
4293   PetscSubcomm           subcomm;
4294   PetscMPIInt            n_sends,n_recvs,commsize;
4295   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4296   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4297   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4298   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4299   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4300   PetscErrorCode         ierr;
4301 
4302   PetscFunctionBegin;
4303   /* TODO: add missing checks */
4304   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4305   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4306   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4307   PetscValidLogicalCollectiveInt(mat,nis,7);
4308   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4309   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4310   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4311   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4312   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4313   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4314   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4315   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4316     PetscInt mrows,mcols,mnrows,mncols;
4317     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4318     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4319     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4320     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4321     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4322     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4323   }
4324   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4325   PetscValidLogicalCollectiveInt(mat,bs,0);
4326   /* prepare IS for sending if not provided */
4327   if (!is_sends) {
4328     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4329     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4330   } else {
4331     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4332     is_sends_internal = is_sends;
4333   }
4334 
4335   /* get comm */
4336   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4337 
4338   /* compute number of sends */
4339   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4340   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4341 
4342   /* compute number of receives */
4343   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4344   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4345   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4346   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4347   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4348   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4349   ierr = PetscFree(iflags);CHKERRQ(ierr);
4350 
4351   /* restrict comm if requested */
4352   subcomm = 0;
4353   destroy_mat = PETSC_FALSE;
4354   if (restrict_comm) {
4355     PetscMPIInt color,subcommsize;
4356 
4357     color = 0;
4358     if (restrict_full) {
4359       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4360     } else {
4361       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4362     }
4363     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4364     subcommsize = commsize - subcommsize;
4365     /* check if reuse has been requested */
4366     if (reuse == MAT_REUSE_MATRIX) {
4367       if (*mat_n) {
4368         PetscMPIInt subcommsize2;
4369         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4370         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4371         comm_n = PetscObjectComm((PetscObject)*mat_n);
4372       } else {
4373         comm_n = PETSC_COMM_SELF;
4374       }
4375     } else { /* MAT_INITIAL_MATRIX */
4376       PetscMPIInt rank;
4377 
4378       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4379       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4380       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4381       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4382       comm_n = PetscSubcommChild(subcomm);
4383     }
4384     /* flag to destroy *mat_n if not significative */
4385     if (color) destroy_mat = PETSC_TRUE;
4386   } else {
4387     comm_n = comm;
4388   }
4389 
4390   /* prepare send/receive buffers */
4391   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4392   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4393   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4394   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4395   if (nis) {
4396     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4397   }
4398 
4399   /* Get data from local matrices */
4400   if (!isdense) {
4401     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4402     /* TODO: See below some guidelines on how to prepare the local buffers */
4403     /*
4404        send_buffer_vals should contain the raw values of the local matrix
4405        send_buffer_idxs should contain:
4406        - MatType_PRIVATE type
4407        - PetscInt        size_of_l2gmap
4408        - PetscInt        global_row_indices[size_of_l2gmap]
4409        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4410     */
4411   } else {
4412     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4413     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4414     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4415     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4416     send_buffer_idxs[1] = i;
4417     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4418     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4419     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4420     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4421     for (i=0;i<n_sends;i++) {
4422       ilengths_vals[is_indices[i]] = len*len;
4423       ilengths_idxs[is_indices[i]] = len+2;
4424     }
4425   }
4426   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4427   /* additional is (if any) */
4428   if (nis) {
4429     PetscMPIInt psum;
4430     PetscInt j;
4431     for (j=0,psum=0;j<nis;j++) {
4432       PetscInt plen;
4433       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4434       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4435       psum += len+1; /* indices + lenght */
4436     }
4437     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4438     for (j=0,psum=0;j<nis;j++) {
4439       PetscInt plen;
4440       const PetscInt *is_array_idxs;
4441       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4442       send_buffer_idxs_is[psum] = plen;
4443       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4444       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4445       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4446       psum += plen+1; /* indices + lenght */
4447     }
4448     for (i=0;i<n_sends;i++) {
4449       ilengths_idxs_is[is_indices[i]] = psum;
4450     }
4451     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4452   }
4453 
4454   buf_size_idxs = 0;
4455   buf_size_vals = 0;
4456   buf_size_idxs_is = 0;
4457   for (i=0;i<n_recvs;i++) {
4458     buf_size_idxs += (PetscInt)olengths_idxs[i];
4459     buf_size_vals += (PetscInt)olengths_vals[i];
4460     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4461   }
4462   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4463   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4464   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4465 
4466   /* get new tags for clean communications */
4467   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4468   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4469   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4470 
4471   /* allocate for requests */
4472   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4473   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4474   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4475   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4476   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4477   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4478 
4479   /* communications */
4480   ptr_idxs = recv_buffer_idxs;
4481   ptr_vals = recv_buffer_vals;
4482   ptr_idxs_is = recv_buffer_idxs_is;
4483   for (i=0;i<n_recvs;i++) {
4484     source_dest = onodes[i];
4485     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4486     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4487     ptr_idxs += olengths_idxs[i];
4488     ptr_vals += olengths_vals[i];
4489     if (nis) {
4490       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4491       ptr_idxs_is += olengths_idxs_is[i];
4492     }
4493   }
4494   for (i=0;i<n_sends;i++) {
4495     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4496     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4497     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4498     if (nis) {
4499       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4500     }
4501   }
4502   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4503   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4504 
4505   /* assemble new l2g map */
4506   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4507   ptr_idxs = recv_buffer_idxs;
4508   new_local_rows = 0;
4509   for (i=0;i<n_recvs;i++) {
4510     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4511     ptr_idxs += olengths_idxs[i];
4512   }
4513   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4514   ptr_idxs = recv_buffer_idxs;
4515   new_local_rows = 0;
4516   for (i=0;i<n_recvs;i++) {
4517     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4518     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4519     ptr_idxs += olengths_idxs[i];
4520   }
4521   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4522   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4523   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4524 
4525   /* infer new local matrix type from received local matrices type */
4526   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4527   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4528   if (n_recvs) {
4529     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4530     ptr_idxs = recv_buffer_idxs;
4531     for (i=0;i<n_recvs;i++) {
4532       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4533         new_local_type_private = MATAIJ_PRIVATE;
4534         break;
4535       }
4536       ptr_idxs += olengths_idxs[i];
4537     }
4538     switch (new_local_type_private) {
4539       case MATDENSE_PRIVATE:
4540         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4541           new_local_type = MATSEQAIJ;
4542           bs = 1;
4543         } else { /* if I receive only 1 dense matrix */
4544           new_local_type = MATSEQDENSE;
4545           bs = 1;
4546         }
4547         break;
4548       case MATAIJ_PRIVATE:
4549         new_local_type = MATSEQAIJ;
4550         bs = 1;
4551         break;
4552       case MATBAIJ_PRIVATE:
4553         new_local_type = MATSEQBAIJ;
4554         break;
4555       case MATSBAIJ_PRIVATE:
4556         new_local_type = MATSEQSBAIJ;
4557         break;
4558       default:
4559         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4560         break;
4561     }
4562   } else { /* by default, new_local_type is seqdense */
4563     new_local_type = MATSEQDENSE;
4564     bs = 1;
4565   }
4566 
4567   /* create MATIS object if needed */
4568   if (reuse == MAT_INITIAL_MATRIX) {
4569     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4570     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4571   } else {
4572     /* it also destroys the local matrices */
4573     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4574   }
4575   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4576   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4577 
4578   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4579 
4580   /* Global to local map of received indices */
4581   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4582   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4583   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4584 
4585   /* restore attributes -> type of incoming data and its size */
4586   buf_size_idxs = 0;
4587   for (i=0;i<n_recvs;i++) {
4588     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4589     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4590     buf_size_idxs += (PetscInt)olengths_idxs[i];
4591   }
4592   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4593 
4594   /* set preallocation */
4595   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4596   if (!newisdense) {
4597     PetscInt *new_local_nnz=0;
4598 
4599     ptr_vals = recv_buffer_vals;
4600     ptr_idxs = recv_buffer_idxs_local;
4601     if (n_recvs) {
4602       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4603     }
4604     for (i=0;i<n_recvs;i++) {
4605       PetscInt j;
4606       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4607         for (j=0;j<*(ptr_idxs+1);j++) {
4608           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4609         }
4610       } else {
4611         /* TODO */
4612       }
4613       ptr_idxs += olengths_idxs[i];
4614     }
4615     if (new_local_nnz) {
4616       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4617       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4618       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4619       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4620       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4621       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4622     } else {
4623       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4624     }
4625     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4626   } else {
4627     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4628   }
4629 
4630   /* set values */
4631   ptr_vals = recv_buffer_vals;
4632   ptr_idxs = recv_buffer_idxs_local;
4633   for (i=0;i<n_recvs;i++) {
4634     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4635       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4636       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4637       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4638       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4639       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4640     } else {
4641       /* TODO */
4642     }
4643     ptr_idxs += olengths_idxs[i];
4644     ptr_vals += olengths_vals[i];
4645   }
4646   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4647   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4648   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4649   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4650   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4651   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4652 
4653 #if 0
4654   if (!restrict_comm) { /* check */
4655     Vec       lvec,rvec;
4656     PetscReal infty_error;
4657 
4658     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4659     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4660     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4661     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4662     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4663     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4664     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4665     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4666     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4667   }
4668 #endif
4669 
4670   /* assemble new additional is (if any) */
4671   if (nis) {
4672     PetscInt **temp_idxs,*count_is,j,psum;
4673 
4674     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4675     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4676     ptr_idxs = recv_buffer_idxs_is;
4677     psum = 0;
4678     for (i=0;i<n_recvs;i++) {
4679       for (j=0;j<nis;j++) {
4680         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4681         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4682         psum += plen;
4683         ptr_idxs += plen+1; /* shift pointer to received data */
4684       }
4685     }
4686     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4687     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4688     for (i=1;i<nis;i++) {
4689       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4690     }
4691     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4692     ptr_idxs = recv_buffer_idxs_is;
4693     for (i=0;i<n_recvs;i++) {
4694       for (j=0;j<nis;j++) {
4695         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4696         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4697         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4698         ptr_idxs += plen+1; /* shift pointer to received data */
4699       }
4700     }
4701     for (i=0;i<nis;i++) {
4702       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4703       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4704       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4705     }
4706     ierr = PetscFree(count_is);CHKERRQ(ierr);
4707     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4708     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4709   }
4710   /* free workspace */
4711   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4712   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4713   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4714   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4715   if (isdense) {
4716     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4717     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4718   } else {
4719     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4720   }
4721   if (nis) {
4722     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4723     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4724   }
4725   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4726   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4727   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4728   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4729   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4730   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4731   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4732   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4733   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4734   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4735   ierr = PetscFree(onodes);CHKERRQ(ierr);
4736   if (nis) {
4737     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4738     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4739     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4740   }
4741   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4742   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4743     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4744     for (i=0;i<nis;i++) {
4745       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4746     }
4747     *mat_n = NULL;
4748   }
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 /* temporary hack into ksp private data structure */
4753 #include <petsc/private/kspimpl.h>
4754 
4755 #undef __FUNCT__
4756 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4757 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4758 {
4759   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4760   PC_IS                  *pcis = (PC_IS*)pc->data;
4761   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4762   MatNullSpace           CoarseNullSpace=NULL;
4763   ISLocalToGlobalMapping coarse_islg;
4764   IS                     coarse_is,*isarray;
4765   PetscInt               i,im_active=-1,active_procs=-1;
4766   PetscInt               nis,nisdofs,nisneu;
4767   PC                     pc_temp;
4768   PCType                 coarse_pc_type;
4769   KSPType                coarse_ksp_type;
4770   PetscBool              multilevel_requested,multilevel_allowed;
4771   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4772   Mat                    t_coarse_mat_is;
4773   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4774   PetscMPIInt            all_procs;
4775   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4776   PetscBool              compute_vecs = PETSC_FALSE;
4777   PetscScalar            *array;
4778   PetscErrorCode         ierr;
4779 
4780   PetscFunctionBegin;
4781   /* Assign global numbering to coarse dofs */
4782   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4783     PetscInt ocoarse_size;
4784     compute_vecs = PETSC_TRUE;
4785     ocoarse_size = pcbddc->coarse_size;
4786     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4787     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4788     /* see if we can avoid some work */
4789     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4790       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4791       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4792         PC        pc;
4793         PetscBool isbddc;
4794 
4795         /* temporary workaround since PCBDDC does not have a reset method so far */
4796         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4797         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4798         if (isbddc) {
4799           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4800         }
4801         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4802         coarse_reuse = PETSC_FALSE;
4803       } else { /* we can safely reuse already computed coarse matrix */
4804         coarse_reuse = PETSC_TRUE;
4805       }
4806     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4807       coarse_reuse = PETSC_FALSE;
4808     }
4809     /* reset any subassembling information */
4810     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4811     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4812   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4813     coarse_reuse = PETSC_TRUE;
4814   }
4815 
4816   /* count "active" (i.e. with positive local size) and "void" processes */
4817   im_active = !!(pcis->n);
4818   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4819   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4820   void_procs = all_procs-active_procs;
4821   csin_type_simple = PETSC_TRUE;
4822   redist = PETSC_FALSE;
4823   if (pcbddc->current_level && void_procs) {
4824     csin_ml = PETSC_TRUE;
4825     ncoarse_ml = void_procs;
4826     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4827     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4828       csin_ds = PETSC_TRUE;
4829       ncoarse_ds = pcbddc->redistribute_coarse;
4830       redist = PETSC_TRUE;
4831     } else {
4832       csin_ds = PETSC_TRUE;
4833       ncoarse_ds = active_procs;
4834       redist = PETSC_TRUE;
4835     }
4836   } else {
4837     csin_ml = PETSC_FALSE;
4838     ncoarse_ml = all_procs;
4839     if (void_procs) {
4840       csin_ds = PETSC_TRUE;
4841       ncoarse_ds = void_procs;
4842       csin_type_simple = PETSC_FALSE;
4843     } else {
4844       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4845         csin_ds = PETSC_TRUE;
4846         ncoarse_ds = pcbddc->redistribute_coarse;
4847         redist = PETSC_TRUE;
4848       } else {
4849         csin_ds = PETSC_FALSE;
4850         ncoarse_ds = all_procs;
4851       }
4852     }
4853   }
4854 
4855   /*
4856     test if we can go multilevel: three conditions must be satisfied:
4857     - we have not exceeded the number of levels requested
4858     - we can actually subassemble the active processes
4859     - we can find a suitable number of MPI processes where we can place the subassembled problem
4860   */
4861   multilevel_allowed = PETSC_FALSE;
4862   multilevel_requested = PETSC_FALSE;
4863   if (pcbddc->current_level < pcbddc->max_levels) {
4864     multilevel_requested = PETSC_TRUE;
4865     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4866       multilevel_allowed = PETSC_FALSE;
4867     } else {
4868       multilevel_allowed = PETSC_TRUE;
4869     }
4870   }
4871   /* determine number of process partecipating to coarse solver */
4872   if (multilevel_allowed) {
4873     ncoarse = ncoarse_ml;
4874     csin = csin_ml;
4875     redist = PETSC_FALSE;
4876   } else {
4877     ncoarse = ncoarse_ds;
4878     csin = csin_ds;
4879   }
4880 
4881   /* creates temporary l2gmap and IS for coarse indexes */
4882   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4883   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4884 
4885   /* creates temporary MATIS object for coarse matrix */
4886   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4887   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4888   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4889   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4890 #if 0
4891   {
4892     PetscViewer viewer;
4893     char filename[256];
4894     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4895     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4896     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4897     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4898     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4899   }
4900 #endif
4901   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4902   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4903   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4904   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4905   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4906 
4907   /* compute dofs splitting and neumann boundaries for coarse dofs */
4908   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4909     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4910     const PetscInt         *idxs;
4911     ISLocalToGlobalMapping tmap;
4912 
4913     /* create map between primal indices (in local representative ordering) and local primal numbering */
4914     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4915     /* allocate space for temporary storage */
4916     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4917     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4918     /* allocate for IS array */
4919     nisdofs = pcbddc->n_ISForDofsLocal;
4920     nisneu = !!pcbddc->NeumannBoundariesLocal;
4921     nis = nisdofs + nisneu;
4922     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4923     /* dofs splitting */
4924     for (i=0;i<nisdofs;i++) {
4925       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4926       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4927       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4928       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4929       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4930       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4931       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4932       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4933     }
4934     /* neumann boundaries */
4935     if (pcbddc->NeumannBoundariesLocal) {
4936       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4937       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4938       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4939       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4940       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4941       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4942       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4943       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4944     }
4945     /* free memory */
4946     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4947     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4948     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4949   } else {
4950     nis = 0;
4951     nisdofs = 0;
4952     nisneu = 0;
4953     isarray = NULL;
4954   }
4955   /* destroy no longer needed map */
4956   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4957 
4958   /* restrict on coarse candidates (if needed) */
4959   coarse_mat_is = NULL;
4960   if (csin) {
4961     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4962       if (redist) {
4963         PetscMPIInt rank;
4964         PetscInt    spc,n_spc_p1,dest[1],destsize;
4965 
4966         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4967         spc = active_procs/ncoarse;
4968         n_spc_p1 = active_procs%ncoarse;
4969         if (im_active) {
4970           destsize = 1;
4971           if (rank > n_spc_p1*(spc+1)-1) {
4972             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4973           } else {
4974             dest[0] = rank/(spc+1);
4975           }
4976         } else {
4977           destsize = 0;
4978         }
4979         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4980       } else if (csin_type_simple) {
4981         PetscMPIInt rank;
4982         PetscInt    issize,isidx;
4983 
4984         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4985         if (im_active) {
4986           issize = 1;
4987           isidx = (PetscInt)rank;
4988         } else {
4989           issize = 0;
4990           isidx = -1;
4991         }
4992         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4993       } else { /* get a suitable subassembling pattern from MATIS code */
4994         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4995       }
4996 
4997       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4998       if (!redist || ncoarse <= void_procs) {
4999         PetscInt ncoarse_cand,tissize,*nisindices;
5000         PetscInt *coarse_candidates;
5001         const PetscInt* tisindices;
5002 
5003         /* get coarse candidates' ranks in pc communicator */
5004         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5005         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5006         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5007           if (!coarse_candidates[i]) {
5008             coarse_candidates[ncoarse_cand++]=i;
5009           }
5010         }
5011         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5012 
5013 
5014         if (pcbddc->dbg_flag) {
5015           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5016           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5017           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5018           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5019           for (i=0;i<ncoarse_cand;i++) {
5020             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5021           }
5022           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5023           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5024         }
5025         /* shift the pattern on coarse candidates */
5026         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5027         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5028         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5029         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5030         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5031         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5032         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5033       }
5034       if (pcbddc->dbg_flag) {
5035         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5036         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5037         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5038         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5039       }
5040     }
5041     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5042     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5043       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5044     } else { /* this is the last level, so use just receiving processes in subcomm */
5045       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5046     }
5047   } else {
5048     if (pcbddc->dbg_flag) {
5049       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5050       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5051       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5052     }
5053     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5054     coarse_mat_is = t_coarse_mat_is;
5055   }
5056 
5057   /* create local to global scatters for coarse problem */
5058   if (compute_vecs) {
5059     PetscInt lrows;
5060     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5061     if (coarse_mat_is) {
5062       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5063     } else {
5064       lrows = 0;
5065     }
5066     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5067     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5068     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5069     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5070     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5071   }
5072   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5073   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5074 
5075   /* set defaults for coarse KSP and PC */
5076   if (multilevel_allowed) {
5077     coarse_ksp_type = KSPRICHARDSON;
5078     coarse_pc_type = PCBDDC;
5079   } else {
5080     coarse_ksp_type = KSPPREONLY;
5081     coarse_pc_type = PCREDUNDANT;
5082   }
5083 
5084   /* print some info if requested */
5085   if (pcbddc->dbg_flag) {
5086     if (!multilevel_allowed) {
5087       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5088       if (multilevel_requested) {
5089         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5090       } else if (pcbddc->max_levels) {
5091         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5092       }
5093       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5094     }
5095   }
5096 
5097   /* create the coarse KSP object only once with defaults */
5098   if (coarse_mat_is) {
5099     MatReuse coarse_mat_reuse;
5100     PetscViewer dbg_viewer = NULL;
5101     if (pcbddc->dbg_flag) {
5102       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5103       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5104     }
5105     if (!pcbddc->coarse_ksp) {
5106       char prefix[256],str_level[16];
5107       size_t len;
5108       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5109       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5110       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5111       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5112       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5113       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5114       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5115       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5116       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5117       /* prefix */
5118       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5119       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5120       if (!pcbddc->current_level) {
5121         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5122         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5123       } else {
5124         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5125         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5126         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5127         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5128         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5129         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5130       }
5131       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5132       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5133       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5134       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5135       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5136       /* allow user customization */
5137       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5138     }
5139     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5140     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5141     if (nisdofs) {
5142       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5143       for (i=0;i<nisdofs;i++) {
5144         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5145       }
5146     }
5147     if (nisneu) {
5148       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5149       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5150     }
5151 
5152     /* get some info after set from options */
5153     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5154     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5155     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5156     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5157       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5158       isbddc = PETSC_FALSE;
5159     }
5160     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5161     if (isredundant) {
5162       KSP inner_ksp;
5163       PC  inner_pc;
5164       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5165       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5166       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5167     }
5168 
5169     /* assemble coarse matrix */
5170     if (coarse_reuse) {
5171       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5172       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5173       coarse_mat_reuse = MAT_REUSE_MATRIX;
5174     } else {
5175       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5176     }
5177     if (isbddc || isnn) {
5178       if (pcbddc->coarsening_ratio > 1) {
5179         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5180           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5181           if (pcbddc->dbg_flag) {
5182             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5183             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5184             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5185             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5186           }
5187         }
5188         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5189       } else {
5190         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5191         coarse_mat = coarse_mat_is;
5192       }
5193     } else {
5194       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5195     }
5196     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5197 
5198     /* propagate symmetry info of coarse matrix */
5199     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5200     if (pc->pmat->symmetric_set) {
5201       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5202     }
5203     if (pc->pmat->hermitian_set) {
5204       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5205     }
5206     if (pc->pmat->spd_set) {
5207       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5208     }
5209     /* set operators */
5210     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5211     if (pcbddc->dbg_flag) {
5212       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5213     }
5214   } else { /* processes non partecipating to coarse solver (if any) */
5215     coarse_mat = 0;
5216   }
5217   ierr = PetscFree(isarray);CHKERRQ(ierr);
5218 #if 0
5219   {
5220     PetscViewer viewer;
5221     char filename[256];
5222     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5223     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5224     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5225     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5226     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5227   }
5228 #endif
5229 
5230   /* Compute coarse null space (special handling by BDDC only) */
5231 #if 0
5232   if (pcbddc->NullSpace) {
5233     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5234   }
5235 #endif
5236   /* hack */
5237   if (pcbddc->coarse_ksp) {
5238     Vec crhs,csol;
5239 
5240     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5241     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5242     if (!csol) {
5243       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5244     }
5245     if (!crhs) {
5246       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5247     }
5248   }
5249 
5250   /* compute null space for coarse solver if the benign trick has been requested */
5251   if (pcbddc->benign_null) {
5252 
5253     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5254     for (i=0;i<pcbddc->benign_n;i++) {
5255       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5256     }
5257     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5258     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5259     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5260     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5261     if (coarse_mat) {
5262       Vec         nullv;
5263       PetscScalar *array,*array2;
5264       PetscInt    nl;
5265 
5266       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5267       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5268       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5269       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5270       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5271       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5272       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5273       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5274       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5275       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5276     }
5277   }
5278 
5279   if (pcbddc->coarse_ksp) {
5280     PetscBool ispreonly;
5281 
5282     if (CoarseNullSpace) {
5283       PetscBool isnull;
5284       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5285       if (isnull) {
5286         if (isbddc) {
5287           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5288         } else {
5289           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5290         }
5291       } else {
5292         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5293       }
5294     }
5295     /* setup coarse ksp */
5296     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5297     /* Check coarse problem if in debug mode or if solving with an iterative method */
5298     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5299     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5300       KSP       check_ksp;
5301       KSPType   check_ksp_type;
5302       PC        check_pc;
5303       Vec       check_vec,coarse_vec;
5304       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5305       PetscInt  its;
5306       PetscBool compute_eigs;
5307       PetscReal *eigs_r,*eigs_c;
5308       PetscInt  neigs;
5309       const char *prefix;
5310 
5311       /* Create ksp object suitable for estimation of extreme eigenvalues */
5312       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5313       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5314       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5315       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5316       if (ispreonly) {
5317         check_ksp_type = KSPPREONLY;
5318         compute_eigs = PETSC_FALSE;
5319       } else {
5320         check_ksp_type = KSPGMRES;
5321         compute_eigs = PETSC_TRUE;
5322       }
5323       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5324       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5325       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5326       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5327       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5328       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5329       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5330       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5331       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5332       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5333       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5334       /* create random vec */
5335       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5336       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5337       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5338       if (CoarseNullSpace) {
5339         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5340       }
5341       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5342       /* solve coarse problem */
5343       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5344       if (CoarseNullSpace) {
5345         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5346       }
5347       /* set eigenvalue estimation if preonly has not been requested */
5348       if (compute_eigs) {
5349         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5350         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5351         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5352         lambda_max = eigs_r[neigs-1];
5353         lambda_min = eigs_r[0];
5354         if (pcbddc->use_coarse_estimates) {
5355           if (lambda_max>lambda_min) {
5356             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5357             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5358           }
5359         }
5360       }
5361 
5362       /* check coarse problem residual error */
5363       if (pcbddc->dbg_flag) {
5364         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5365         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5366         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5367         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5368         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5369         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5370         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5371         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5372         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5373         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5374         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5375         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5376         if (CoarseNullSpace) {
5377           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5378         }
5379         if (compute_eigs) {
5380           PetscReal lambda_max_s,lambda_min_s;
5381           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5382           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5383           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5384           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5385           for (i=0;i<neigs;i++) {
5386             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5387           }
5388         }
5389         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5390         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5391       }
5392       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5393       if (compute_eigs) {
5394         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5395         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5396       }
5397     }
5398   }
5399   /* print additional info */
5400   if (pcbddc->dbg_flag) {
5401     /* waits until all processes reaches this point */
5402     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5403     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5404     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5405   }
5406 
5407   /* free memory */
5408   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5409   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5410   PetscFunctionReturn(0);
5411 }
5412 
5413 #undef __FUNCT__
5414 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5415 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5416 {
5417   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5418   PC_IS*         pcis = (PC_IS*)pc->data;
5419   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5420   IS             subset,subset_mult,subset_n;
5421   PetscInt       local_size,coarse_size=0;
5422   PetscInt       *local_primal_indices=NULL;
5423   const PetscInt *t_local_primal_indices;
5424   PetscErrorCode ierr;
5425 
5426   PetscFunctionBegin;
5427   /* Compute global number of coarse dofs */
5428   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5429     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5430   }
5431   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5432   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5433   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5434   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5435   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5436   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5437   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5438   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5439   if (local_size != pcbddc->local_primal_size) {
5440     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5441   }
5442   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5443   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5444   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5445   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5446   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5447 
5448   /* check numbering */
5449   if (pcbddc->dbg_flag) {
5450     PetscScalar coarsesum,*array,*array2;
5451     PetscInt    i;
5452     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5453 
5454     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5455     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5456     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5457     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5458     /* counter */
5459     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5460     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5461     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5462     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5463     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5464     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5465 
5466     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5467     for (i=0;i<pcbddc->local_primal_size;i++) {
5468       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5469     }
5470     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5471     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5472     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5473     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5474     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5475     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5476     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5477     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5478     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5479     for (i=0;i<pcis->n;i++) {
5480       if (array[i] != 0.0 && array[i] != array2[i]) {
5481         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5482         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5483         set_error = PETSC_TRUE;
5484         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5485       }
5486     }
5487     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5488     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5489     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5490     for (i=0;i<pcis->n;i++) {
5491       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5492     }
5493     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5494     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5495     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5496     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5497     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5498     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5499     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5500       PetscInt *gidxs;
5501 
5502       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5503       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5504       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5505       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5506       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5507       for (i=0;i<pcbddc->local_primal_size;i++) {
5508         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5509       }
5510       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5511       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5512     }
5513     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5514     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5515   }
5516   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5517   /* get back data */
5518   *coarse_size_n = coarse_size;
5519   *local_primal_indices_n = local_primal_indices;
5520   PetscFunctionReturn(0);
5521 }
5522 
5523 #undef __FUNCT__
5524 #define __FUNCT__ "PCBDDCGlobalToLocal"
5525 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5526 {
5527   IS             localis_t;
5528   PetscInt       i,lsize,*idxs,n;
5529   PetscScalar    *vals;
5530   PetscErrorCode ierr;
5531 
5532   PetscFunctionBegin;
5533   /* get indices in local ordering exploiting local to global map */
5534   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5535   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5536   for (i=0;i<lsize;i++) vals[i] = 1.0;
5537   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5538   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5539   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5540   if (idxs) { /* multilevel guard */
5541     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5542   }
5543   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5544   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5545   ierr = PetscFree(vals);CHKERRQ(ierr);
5546   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5547   /* now compute set in local ordering */
5548   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5549   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5550   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5551   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5552   for (i=0,lsize=0;i<n;i++) {
5553     if (PetscRealPart(vals[i]) > 0.5) {
5554       lsize++;
5555     }
5556   }
5557   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5558   for (i=0,lsize=0;i<n;i++) {
5559     if (PetscRealPart(vals[i]) > 0.5) {
5560       idxs[lsize++] = i;
5561     }
5562   }
5563   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5564   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5565   *localis = localis_t;
5566   PetscFunctionReturn(0);
5567 }
5568 
5569 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5570 #undef __FUNCT__
5571 #define __FUNCT__ "PCBDDCMatMult_Private"
5572 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5573 {
5574   PCBDDCChange_ctx change_ctx;
5575   PetscErrorCode   ierr;
5576 
5577   PetscFunctionBegin;
5578   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5579   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5580   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5581   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5582   PetscFunctionReturn(0);
5583 }
5584 
5585 #undef __FUNCT__
5586 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5587 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5588 {
5589   PCBDDCChange_ctx change_ctx;
5590   PetscErrorCode   ierr;
5591 
5592   PetscFunctionBegin;
5593   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5594   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5595   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5596   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5597   PetscFunctionReturn(0);
5598 }
5599 
5600 #undef __FUNCT__
5601 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5602 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5603 {
5604   PC_IS               *pcis=(PC_IS*)pc->data;
5605   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5606   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5607   Mat                 S_j;
5608   PetscInt            *used_xadj,*used_adjncy;
5609   PetscBool           free_used_adj;
5610   PetscErrorCode      ierr;
5611 
5612   PetscFunctionBegin;
5613   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5614   free_used_adj = PETSC_FALSE;
5615   if (pcbddc->sub_schurs_layers == -1) {
5616     used_xadj = NULL;
5617     used_adjncy = NULL;
5618   } else {
5619     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5620       used_xadj = pcbddc->mat_graph->xadj;
5621       used_adjncy = pcbddc->mat_graph->adjncy;
5622     } else if (pcbddc->computed_rowadj) {
5623       used_xadj = pcbddc->mat_graph->xadj;
5624       used_adjncy = pcbddc->mat_graph->adjncy;
5625     } else {
5626       PetscBool      flg_row=PETSC_FALSE;
5627       const PetscInt *xadj,*adjncy;
5628       PetscInt       nvtxs;
5629 
5630       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5631       if (flg_row) {
5632         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5633         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5634         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5635         free_used_adj = PETSC_TRUE;
5636       } else {
5637         pcbddc->sub_schurs_layers = -1;
5638         used_xadj = NULL;
5639         used_adjncy = NULL;
5640       }
5641       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5642     }
5643   }
5644 
5645   /* setup sub_schurs data */
5646   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5647   if (!sub_schurs->use_mumps) {
5648     /* pcbddc->ksp_D up to date only if not using MUMPS */
5649     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5650     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5651   } else {
5652     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5653     PetscBool isseqaij;
5654     if (!pcbddc->use_vertices && reuse_solvers) {
5655       PetscInt n_vertices;
5656 
5657       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5658       reuse_solvers = (PetscBool)!n_vertices;
5659     }
5660     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5661     if (!isseqaij) {
5662       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5663       if (matis->A == pcbddc->local_mat) {
5664         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5665         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5666       } else {
5667         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5668       }
5669     }
5670     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5671   }
5672   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5673 
5674   /* free adjacency */
5675   if (free_used_adj) {
5676     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5677   }
5678   PetscFunctionReturn(0);
5679 }
5680 
5681 #undef __FUNCT__
5682 #define __FUNCT__ "PCBDDCInitSubSchurs"
5683 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5684 {
5685   PC_IS               *pcis=(PC_IS*)pc->data;
5686   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5687   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5688   PCBDDCGraph         graph;
5689   PetscErrorCode      ierr;
5690 
5691   PetscFunctionBegin;
5692   /* attach interface graph for determining subsets */
5693   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5694     IS       verticesIS,verticescomm;
5695     PetscInt vsize,*idxs;
5696 
5697     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5698     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5699     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5700     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5701     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5702     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5703     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5704     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5705     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5706     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5707     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5708 /*
5709     if (pcbddc->dbg_flag) {
5710       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5711     }
5712 */
5713   } else {
5714     graph = pcbddc->mat_graph;
5715   }
5716 
5717   /* sub_schurs init */
5718   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5719 
5720   /* free graph struct */
5721   if (pcbddc->sub_schurs_rebuild) {
5722     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5723   }
5724   PetscFunctionReturn(0);
5725 }
5726 
5727 #undef __FUNCT__
5728 #define __FUNCT__ "PCBDDCCheckOperator"
5729 PetscErrorCode PCBDDCCheckOperator(PC pc)
5730 {
5731   PC_IS               *pcis=(PC_IS*)pc->data;
5732   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5733   PetscErrorCode      ierr;
5734 
5735   PetscFunctionBegin;
5736   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5737     IS             zerodiag = NULL;
5738     Mat            S_j,B0_B=NULL;
5739     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5740     PetscScalar    *p0_check,*array,*array2;
5741     PetscReal      norm;
5742     PetscInt       i;
5743 
5744     /* B0 and B0_B */
5745     if (zerodiag) {
5746       IS       dummy;
5747 
5748       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5749       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5750       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5751       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5752     }
5753     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5754     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5755     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5756     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5757     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5758     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5759     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5760     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5761     /* S_j */
5762     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5763     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5764 
5765     /* mimic vector in \widetilde{W}_\Gamma */
5766     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5767     /* continuous in primal space */
5768     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5769     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5770     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5771     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5772     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5773     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5774     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5775     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5776     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5777     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5778     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5779     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5780     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5781     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5782 
5783     /* assemble rhs for coarse problem */
5784     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5785     /* local with Schur */
5786     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5787     if (zerodiag) {
5788       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5789       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5790       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5791       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5792     }
5793     /* sum on primal nodes the local contributions */
5794     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5795     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5796     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5797     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5798     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5799     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5800     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5801     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5802     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5803     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5804     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5805     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5806     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5807     /* scale primal nodes (BDDC sums contibutions) */
5808     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5809     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5810     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5811     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5812     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5813     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5814     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5815     /* global: \widetilde{B0}_B w_\Gamma */
5816     if (zerodiag) {
5817       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5818       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5819       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5820       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5821     }
5822     /* BDDC */
5823     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5824     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5825 
5826     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5827     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5828     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5829     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5830     for (i=0;i<pcbddc->benign_n;i++) {
5831       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5832     }
5833     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5834     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5835     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5836     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5837     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5838     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5839   }
5840   PetscFunctionReturn(0);
5841 }
5842