xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ffd830a353d0919fb77324c0e33a98985abf7504)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146 
147   /* partial clean up */
148   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
149   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
150   if (A != B) {
151     ierr = MatDestroy(&B);CHKERRQ(ierr);
152   }
153 
154   /* get back data */
155   if (ncc) *ncc = graph->ncc;
156   if (cc) {
157     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
158     for (i=0;i<graph->ncc;i++) {
159       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
160     }
161     *cc = cc_n;
162   }
163   /* clean up graph */
164   graph->xadj = 0;
165   graph->adjncy = 0;
166   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "PCBDDCBenignCheck"
172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
173 {
174   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
175   PC_IS*         pcis = (PC_IS*)(pc->data);
176   IS             dirIS = NULL;
177   PetscInt       i;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
182   if (zerodiag) {
183     Mat            A;
184     Vec            vec3_N;
185     PetscScalar    *vals;
186     const PetscInt *idxs;
187     PetscInt       nz;
188 
189     /* p0 */
190     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
191     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
192     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
193     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
194     for (i=0;i<nz;i++) vals[i] = 1.;
195     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
196     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
197     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
198     /* v_I */
199     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
200     for (i=0;i<nz;i++) vals[i] = 0.;
201     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
202     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
203     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
204     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
205     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
206     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
207     if (dirIS) {
208       PetscInt n;
209 
210       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
211       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
212       for (i=0;i<n;i++) vals[i] = 0.;
213       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
214       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
215     }
216     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
218     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
219     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
220     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
221     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
222     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
223     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
224       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
225     }
226     ierr = PetscFree(vals);CHKERRQ(ierr);
227     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
228   }
229   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
230 
231   /* check PCBDDCBenignGetOrSetP0 */
232   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
233   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
234   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
235   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
236   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
237   for (i=0;i<pcbddc->benign_n;i++) {
238     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
239       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
240     }
241   }
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
248 {
249   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
250   IS             pressures,zerodiag,*zerodiag_subs;
251   PetscInt       nz,n;
252   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
257   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
258   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
259   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
260   pcbddc->benign_original_mat = pcbddc->local_mat;
261   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
262      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
263      Checks if all the pressure dofs in each subdomain have a zero diagonal
264      If not, a change of basis on pressures is not needed
265      since the local Schur complements are SPD
266   */
267   has_null_pressures = PETSC_TRUE;
268   have_null = PETSC_TRUE;
269   if (pcbddc->n_ISForDofsLocal) {
270     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
271 
272     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
273     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
274     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
275     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
276     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
277     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
278     if (!sorted) {
279       ierr = ISSort(pressures);CHKERRQ(ierr);
280     }
281   } else {
282     pressures = NULL;
283   }
284   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
285   /* TODO: add check for shared dofs and raise error */
286   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
287   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
288   if (!sorted) {
289     ierr = ISSort(zerodiag);CHKERRQ(ierr);
290   }
291   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
292   if (!nz) {
293     if (n) have_null = PETSC_FALSE;
294     has_null_pressures = PETSC_FALSE;
295     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
296   }
297   recompute_zerodiag = PETSC_FALSE;
298   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
299   zerodiag_subs = NULL;
300   pcbddc->benign_n = 0;
301   if (has_null_pressures) {
302     IS       *subs;
303     PetscInt nsubs,i;
304 
305     subs = pcbddc->local_subs;
306     nsubs = pcbddc->n_local_subs;
307     if (nsubs > 1) {
308       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
309       for (i=0;i<nsubs;i++) {
310         ISLocalToGlobalMapping l2g;
311         IS                     t_zerodiag_subs;
312         PetscInt               nl;
313 
314         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
315         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
316         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
317         if (nl) {
318           PetscBool valid = PETSC_TRUE;
319 
320           if (pressures) {
321             IS t_pressure_subs;
322             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
323             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
324             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
325           }
326           if (valid) {
327             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
328             pcbddc->benign_n++;
329           } else {
330             recompute_zerodiag = PETSC_TRUE;
331           }
332         }
333         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
334         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
335       }
336     } else { /* there's just one subdomain (or zero if they have not been detected */
337       PetscBool valid = PETSC_TRUE;
338       if (pressures) {
339         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
340       }
341       if (valid) {
342         pcbddc->benign_n = 1;
343         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
344         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
345         zerodiag_subs[0] = zerodiag;
346       }
347     }
348   }
349 
350   if (!pcbddc->benign_n) {
351     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
352     recompute_zerodiag = PETSC_FALSE;
353     has_null_pressures = PETSC_FALSE;
354     have_null = PETSC_FALSE;
355   }
356 
357   /* final check for null pressures */
358   if (zerodiag && pressures) {
359     PetscInt nz,np;
360     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
361     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
362     if (nz != np) have_null = PETSC_FALSE;
363   }
364 
365   if (recompute_zerodiag) {
366     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
367     if (pcbddc->benign_n == 1) {
368       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
369       zerodiag = zerodiag_subs[0];
370     } else {
371       PetscInt i,nzn,*new_idxs;
372 
373       nzn = 0;
374       for (i=0;i<pcbddc->benign_n;i++) {
375         PetscInt ns;
376         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
377         nzn += ns;
378       }
379       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
380       nzn = 0;
381       for (i=0;i<pcbddc->benign_n;i++) {
382         PetscInt ns,*idxs;
383         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
384         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
386         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
387         nzn += ns;
388       }
389       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
390       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
391     }
392     have_null = PETSC_FALSE;
393   }
394 
395   if (has_null_pressures) {
396     IS             zerodiagc;
397     const PetscInt *idxs,*idxsc;
398     PetscInt       i,s,*nnz;
399     Mat            M;
400 
401     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
402     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
403     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
404     /* local change of basis for pressures */
405     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
406     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
407     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
408     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
409     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
410     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
411     for (i=0;i<pcbddc->benign_n;i++) {
412       PetscInt nzs,j;
413 
414       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
415       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
416       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
417       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
418       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
419     }
420     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
421     ierr = PetscFree(nnz);CHKERRQ(ierr);
422     /* set identity on velocities */
423     for (i=0;i<n-nz;i++) {
424       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
425     }
426     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
427     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
428     ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
429     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
430     /* set change on pressures */
431     for (s=0;s<pcbddc->benign_n;s++) {
432       PetscScalar *array;
433       PetscInt    nzs;
434 
435       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
436       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
437       for (i=0;i<nzs-1;i++) {
438         PetscScalar vals[2];
439         PetscInt    cols[2];
440 
441         cols[0] = idxs[i];
442         cols[1] = idxs[nzs-1];
443         vals[0] = 1.;
444         vals[1] = 1.;
445         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
446       }
447       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
448       for (i=0;i<nzs-1;i++) array[i] = -1.;
449       array[nzs-1] = 1.;
450       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
451       /* store local idxs for p0 */
452       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
453       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
454       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
455       ierr = PetscFree(array);CHKERRQ(ierr);
456     }
457     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459     /* TODO: need optimization? */
460     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
461     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
462     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
463     ierr = MatDestroy(&M);CHKERRQ(ierr);
464     /* store global idxs for p0 */
465     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
466   }
467   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
468   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
469 
470   /* determines if the coarse solver will be singular or not */
471   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
472   *zerodiaglocal = zerodiag;
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
478 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
479 {
480   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
481   PetscErrorCode ierr;
482 
483   PetscFunctionBegin;
484   if (!pcbddc->benign_sf) {
485     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
486     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
487   }
488   if (get) { /* use SF to get values */
489     PetscScalar *array;
490 
491     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
492     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
493     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
494     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
495   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
496     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
497     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
498     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
499   }
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
505 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
506 {
507   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
508   PetscErrorCode ierr;
509 
510   PetscFunctionBegin;
511   /* TODO: add error checking
512     - avoid nested pop (or push) calls.
513     - cannot push before pop.
514     - cannot call this if pcbddc->local_mat is NULL
515   */
516   if (!pcbddc->benign_n) {
517     PetscFunctionReturn(0);
518   }
519   if (pop) {
520     IS       is_p0;
521     MatReuse reuse;
522 
523     /* extract B_0 */
524     reuse = MAT_INITIAL_MATRIX;
525     if (pcbddc->benign_B0) {
526       reuse = MAT_REUSE_MATRIX;
527     }
528     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
529     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
530     /* remove rows and cols from local problem */
531     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
532     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
533     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
534   } else { /* push */
535     PetscInt i;
536 
537     for (i=0;i<pcbddc->benign_n;i++) {
538       PetscScalar *B0_vals;
539       PetscInt    *B0_cols,B0_ncol;
540 
541       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
542       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
543       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
544       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
545       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
546     }
547     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "PCBDDCAdaptiveSelection"
555 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
556 {
557   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
558   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
559   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
560   PetscBLASInt    *B_iwork,*B_ifail;
561   PetscScalar     *work,lwork;
562   PetscScalar     *St,*S,*eigv;
563   PetscScalar     *Sarray,*Starray;
564   PetscReal       *eigs,thresh;
565   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
566   PetscBool       allocated_S_St;
567 #if defined(PETSC_USE_COMPLEX)
568   PetscReal       *rwork;
569 #endif
570   PetscErrorCode  ierr;
571 
572   PetscFunctionBegin;
573   if (!sub_schurs->use_mumps) {
574     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
575   }
576 
577   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
578     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
579   }
580 
581   if (pcbddc->dbg_flag) {
582     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
584     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
585     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
586   }
587 
588   if (pcbddc->dbg_flag) {
589     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
590   }
591 
592   /* max size of subsets */
593   mss = 0;
594   for (i=0;i<sub_schurs->n_subs;i++) {
595     PetscInt subset_size;
596 
597     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
598     mss = PetscMax(mss,subset_size);
599   }
600 
601   /* min/max and threshold */
602   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
603   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
604   nmax = PetscMax(nmin,nmax);
605   allocated_S_St = PETSC_FALSE;
606   if (nmin) {
607     allocated_S_St = PETSC_TRUE;
608   }
609 
610   /* allocate lapack workspace */
611   cum = cum2 = 0;
612   maxneigs = 0;
613   for (i=0;i<sub_schurs->n_subs;i++) {
614     PetscInt n,subset_size;
615 
616     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
617     n = PetscMin(subset_size,nmax);
618     cum += subset_size;
619     cum2 += subset_size*n;
620     maxneigs = PetscMax(maxneigs,n);
621   }
622   if (mss) {
623     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
624       PetscBLASInt B_itype = 1;
625       PetscBLASInt B_N = mss;
626       PetscReal    zero = 0.0;
627       PetscReal    eps = 0.0; /* dlamch? */
628 
629       B_lwork = -1;
630       S = NULL;
631       St = NULL;
632       eigs = NULL;
633       eigv = NULL;
634       B_iwork = NULL;
635       B_ifail = NULL;
636 #if defined(PETSC_USE_COMPLEX)
637       rwork = NULL;
638 #endif
639       thresh = 1.0;
640       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
641 #if defined(PETSC_USE_COMPLEX)
642       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
643 #else
644       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
645 #endif
646       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
647       ierr = PetscFPTrapPop();CHKERRQ(ierr);
648     } else {
649         /* TODO */
650     }
651   } else {
652     lwork = 0;
653   }
654 
655   nv = 0;
656   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
657     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
658   }
659   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
660   if (allocated_S_St) {
661     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
662   }
663   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
664 #if defined(PETSC_USE_COMPLEX)
665   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
666 #endif
667   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
668                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
669                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
670                       nv+cum,&pcbddc->adaptive_constraints_idxs,
671                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
672   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
673 
674   maxneigs = 0;
675   cum = cum2 = cumarray = 0;
676   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
677   pcbddc->adaptive_constraints_data_ptr[0] = 0;
678   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
679     const PetscInt *idxs;
680 
681     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
682     for (cum=0;cum<nv;cum++) {
683       pcbddc->adaptive_constraints_n[cum] = 1;
684       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
685       pcbddc->adaptive_constraints_data[cum] = 1.0;
686       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
687       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
688     }
689     cum2 = cum;
690     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
691   }
692 
693   if (mss) { /* multilevel */
694     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
695     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
696   }
697 
698   thresh = pcbddc->adaptive_threshold;
699   for (i=0;i<sub_schurs->n_subs;i++) {
700     const PetscInt *idxs;
701     PetscReal      infty = PETSC_MAX_REAL;
702     PetscInt       j,subset_size,eigs_start = 0;
703     PetscBLASInt   B_N;
704     PetscBool      same_data = PETSC_FALSE;
705 
706     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
707     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
708     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
709     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
710       if (sub_schurs->is_hermitian) {
711         PetscInt j,k;
712         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
713           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
714           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
715         }
716         for (j=0;j<subset_size;j++) {
717           for (k=j;k<subset_size;k++) {
718             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
719             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
720           }
721         }
722       } else {
723         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
724         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
725       }
726     } else {
727       S = Sarray + cumarray;
728       St = Starray + cumarray;
729     }
730 
731     /* see if we can save some work */
732     if (sub_schurs->n_subs == 1) {
733       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
734     }
735 
736     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
737       B_neigs = 0;
738     } else {
739       if (nmin) {
740         Mat SM,StM;
741         PetscInt j,k,nccs,nccst;
742 
743         for (j=0;j<subset_size;j++) {
744           for (k=j;k<subset_size;k++) {
745             S [k*subset_size+j] = S [j*subset_size+k];
746             St[k*subset_size+j] = St[j*subset_size+k];
747           }
748         }
749         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr);
750         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr);
751         ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr);
752         ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr);
753         if (nccs != 1 || nccst != 1) {
754           PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size);
755         }
756         ierr = MatDestroy(&SM);CHKERRQ(ierr);
757         ierr = MatDestroy(&StM);CHKERRQ(ierr);
758       }
759 
760       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
761         PetscBLASInt B_itype = 1;
762         PetscBLASInt B_IL, B_IU;
763         PetscReal    eps = -1.0; /* dlamch? */
764         PetscInt     nmin_s;
765 
766         if (pcbddc->dbg_flag) {
767           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
768         }
769 
770         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
771         if (thresh > 1.+PETSC_SMALL) {
772 
773           /* ask for eigenvalues larger than thresh */
774 #if defined(PETSC_USE_COMPLEX)
775           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
776 #else
777           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
778 #endif
779         } else {
780           B_IU = PetscMax(1,PetscMin(B_N,nmax));
781           B_IL = 1;
782 #if defined(PETSC_USE_COMPLEX)
783           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
784 #else
785           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
786 #endif
787         }
788         ierr = PetscFPTrapPop();CHKERRQ(ierr);
789         if (B_ierr) {
790           if (B_ierr < 0 ) {
791             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
792           } else if (B_ierr <= B_N) {
793             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
794           } else {
795             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
796           }
797         }
798 
799         if (B_neigs > nmax) {
800           if (pcbddc->dbg_flag) {
801             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
802           }
803           eigs_start = B_neigs -nmax;
804           B_neigs = nmax;
805         }
806 
807         nmin_s = PetscMin(nmin,B_N);
808         if (B_neigs < nmin_s) {
809           PetscBLASInt B_neigs2;
810 
811           B_IU = B_N - B_neigs;
812           B_IL = B_N - nmin_s + 1;
813           if (pcbddc->dbg_flag) {
814             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
815           }
816           if (sub_schurs->is_hermitian) {
817             PetscInt j;
818             for (j=0;j<subset_size;j++) {
819               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
820             }
821             for (j=0;j<subset_size;j++) {
822               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
823             }
824           } else {
825             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
826             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
827           }
828           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
829 #if defined(PETSC_USE_COMPLEX)
830           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
831 #else
832           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
833 #endif
834           ierr = PetscFPTrapPop();CHKERRQ(ierr);
835           B_neigs += B_neigs2;
836         }
837         if (B_ierr) {
838           if (B_ierr < 0 ) {
839             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
840           } else if (B_ierr <= B_N) {
841             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
842           } else {
843             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
844           }
845         }
846         if (pcbddc->dbg_flag) {
847           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
848           for (j=0;j<B_neigs;j++) {
849             if (eigs[j] == 0.0) {
850               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
851             } else {
852               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
853             }
854           }
855         }
856       } else {
857           /* TODO */
858       }
859     }
860     maxneigs = PetscMax(B_neigs,maxneigs);
861     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
862     if (B_neigs) {
863       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
864 
865       if (pcbddc->dbg_flag > 1) {
866         PetscInt ii;
867         for (ii=0;ii<B_neigs;ii++) {
868           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
869           for (j=0;j<B_N;j++) {
870 #if defined(PETSC_USE_COMPLEX)
871             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
872             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
873             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
874 #else
875             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
876 #endif
877           }
878         }
879       }
880 #if 0
881       for (j=0;j<B_neigs;j++) {
882         PetscBLASInt Blas_N,Blas_one = 1.0;
883         PetscScalar norm;
884         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
885         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
886                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
887         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
888           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
889         } else {
890           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
891         }
892         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
893       }
894 #endif
895       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
896       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
897       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
898       cum++;
899     }
900     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
901     /* shift for next computation */
902     cumarray += subset_size*subset_size;
903   }
904   if (pcbddc->dbg_flag) {
905     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
906   }
907 
908   if (mss) {
909     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
910     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
911     /* destroy matrices (junk) */
912     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
913     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
914   }
915   if (allocated_S_St) {
916     ierr = PetscFree2(S,St);CHKERRQ(ierr);
917   }
918   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
919 #if defined(PETSC_USE_COMPLEX)
920   ierr = PetscFree(rwork);CHKERRQ(ierr);
921 #endif
922   if (pcbddc->dbg_flag) {
923     PetscInt maxneigs_r;
924     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
925     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
926   }
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "PCBDDCSetUpSolvers"
932 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
933 {
934   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
935   PetscScalar    *coarse_submat_vals;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   /* Setup local scatters R_to_B and (optionally) R_to_D */
940   /* PCBDDCSetUpLocalWorkVectors should be called first! */
941   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
942 
943   /* Setup local neumann solver ksp_R */
944   /* PCBDDCSetUpLocalScatters should be called first! */
945   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
946 
947   /* Change global null space passed in by the user if change of basis has been requested */
948   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
949     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
950   }
951 
952   /*
953      Setup local correction and local part of coarse basis.
954      Gives back the dense local part of the coarse matrix in column major ordering
955   */
956   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
957 
958   /* Compute total number of coarse nodes and setup coarse solver */
959   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
960 
961   /* free */
962   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "PCBDDCResetCustomization"
968 PetscErrorCode PCBDDCResetCustomization(PC pc)
969 {
970   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
975   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
976   ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
977   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
978   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
979   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
980   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
981   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
982   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
983   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
984   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
985   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "PCBDDCResetTopography"
991 PetscErrorCode PCBDDCResetTopography(PC pc)
992 {
993   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
994   PetscInt       i;
995   PetscErrorCode ierr;
996 
997   PetscFunctionBegin;
998   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
999   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1000   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1001   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
1002   for (i=0;i<pcbddc->n_local_subs;i++) {
1003     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
1004   }
1005   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
1006   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "PCBDDCResetSolvers"
1012 PetscErrorCode PCBDDCResetSolvers(PC pc)
1013 {
1014   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1015   PetscScalar    *array;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
1020   if (pcbddc->coarse_phi_B) {
1021     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
1022     ierr = PetscFree(array);CHKERRQ(ierr);
1023   }
1024   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1025   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1026   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1027   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1028   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1029   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1030   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1031   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1032   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1033   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1034   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1035   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1036   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1037   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1038   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1039   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1040   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1041   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1042   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1043   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1044   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1045   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1046   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1047   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1048   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1049   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1050   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1051   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1052   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1053   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1059 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1060 {
1061   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1062   PC_IS          *pcis = (PC_IS*)pc->data;
1063   VecType        impVecType;
1064   PetscInt       n_constraints,n_R,old_size;
1065   PetscErrorCode ierr;
1066 
1067   PetscFunctionBegin;
1068   if (!pcbddc->ConstraintMatrix) {
1069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1070   }
1071   /* get sizes */
1072   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1073   n_R = pcis->n - pcbddc->n_vertices;
1074   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1075   /* local work vectors (try to avoid unneeded work)*/
1076   /* R nodes */
1077   old_size = -1;
1078   if (pcbddc->vec1_R) {
1079     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1080   }
1081   if (n_R != old_size) {
1082     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1083     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1084     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1085     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1086     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1087     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1088   }
1089   /* local primal dofs */
1090   old_size = -1;
1091   if (pcbddc->vec1_P) {
1092     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1093   }
1094   if (pcbddc->local_primal_size != old_size) {
1095     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1096     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1097     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1098     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1099   }
1100   /* local explicit constraints */
1101   old_size = -1;
1102   if (pcbddc->vec1_C) {
1103     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1104   }
1105   if (n_constraints && n_constraints != old_size) {
1106     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1107     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1108     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1109     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "PCBDDCSetUpCorrection"
1116 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1117 {
1118   PetscErrorCode  ierr;
1119   /* pointers to pcis and pcbddc */
1120   PC_IS*          pcis = (PC_IS*)pc->data;
1121   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1122   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1123   /* submatrices of local problem */
1124   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1125   /* submatrices of benign trick */
1126   Mat             B0_V = NULL;
1127   /* submatrices of local coarse problem */
1128   Mat             S_VV,S_CV,S_VC,S_CC;
1129   /* working matrices */
1130   Mat             C_CR;
1131   /* additional working stuff */
1132   PC              pc_R;
1133   Mat             F;
1134   PetscBool       isLU,isCHOL,isILU;
1135 
1136   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1137   PetscScalar     *work;
1138   PetscInt        *idx_V_B;
1139   PetscInt        lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
1140   PetscInt        i,n_R,n_D,n_B;
1141 
1142   /* some shortcuts to scalars */
1143   PetscScalar     one=1.0,m_one=-1.0;
1144 
1145   PetscFunctionBegin;
1146   if (!pcbddc->symmetric_primal && pcbddc->benign_n) {
1147     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
1148   }
1149 
1150   /* Set Non-overlapping dimensions */
1151   n_vertices = pcbddc->n_vertices;
1152   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1153   n_B = pcis->n_B;
1154   n_D = pcis->n - n_B;
1155   n_R = pcis->n - n_vertices;
1156 
1157   /* vertices in boundary numbering */
1158   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1159   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1160   if (i != n_vertices) {
1161     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1162   }
1163 
1164   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1165   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1166   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1167   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1168   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1169   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1170   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1171   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1172   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1173   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1174 
1175   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1176   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1177   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1178   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1179   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1180   lda_rhs = n_R;
1181   if (isLU || isILU || isCHOL) {
1182     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1183   } else if (sub_schurs->reuse_mumps) {
1184     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1185     MatFactorType type;
1186 
1187     F = reuse_mumps->F;
1188     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1189     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1190     ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr);
1191   } else {
1192     F = NULL;
1193   }
1194 
1195   /* allocate workspace */
1196   n = 0;
1197   if (n_constraints) {
1198     n += lda_rhs*n_constraints;
1199   }
1200   if (n_vertices) {
1201     n = PetscMax(2*lda_rhs*n_vertices,n);
1202     n = PetscMax((lda_rhs+n_B)*n_vertices,n);
1203   }
1204   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1205 
1206   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1207   if (n_constraints) {
1208     Mat         M1,M2,M3;
1209     Mat         auxmat;
1210     IS          is_aux;
1211     PetscScalar *array,*array2;
1212 
1213     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1214     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1215 
1216     /* Extract constraints on R nodes: C_{CR}  */
1217     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1218     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1220 
1221     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1222     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1223     ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1224     for (i=0;i<n_constraints;i++) {
1225       const PetscScalar *row_cmat_values;
1226       const PetscInt    *row_cmat_indices;
1227       PetscInt          size_of_constraint,j;
1228 
1229       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1230       for (j=0;j<size_of_constraint;j++) {
1231         work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
1232       }
1233       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1234     }
1235     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1236     if (F) {
1237       Mat B;
1238 
1239       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1240       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1241       ierr = MatDestroy(&B);CHKERRQ(ierr);
1242     } else {
1243       PetscScalar *marr;
1244 
1245       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1246       for (i=0;i<n_constraints;i++) {
1247         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1248         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr);
1249         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1250         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1251         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1252       }
1253       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1254     }
1255     if (!pcbddc->switch_static) {
1256       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1257       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1258       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1259       for (i=0;i<n_constraints;i++) {
1260         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr);
1261         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1262         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1265         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1266       }
1267       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1268       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1269       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1270     } else {
1271       if (lda_rhs != n_R) {
1272         IS dummy;
1273 
1274         ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr);
1275         ierr = MatGetSubMatrix(local_auxmat2_R,dummy,dummy,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1276         ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1277       } else {
1278         ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1279         pcbddc->local_auxmat2 = local_auxmat2_R;
1280       }
1281       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1282     }
1283     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1284     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1285     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1286     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1287     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1288     if (isCHOL) {
1289       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1290     } else {
1291       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1292     }
1293     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1294     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1295     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1296     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1297     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1298     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1299     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1300     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1301     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1302     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1303   }
1304   /* Get submatrices from subdomain matrix */
1305   if (pcbddc->benign_n) {
1306     IS        dummy;
1307     Mat       B0_R;
1308     PetscReal norm;
1309 
1310     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1311     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1312     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1313     if (norm > PETSC_SMALL) {
1314       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1315     }
1316     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1317     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1318   }
1319 
1320   if (n_vertices) {
1321     IS is_aux;
1322 
1323     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1324       IS tis;
1325 
1326       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1327       ierr = ISSort(tis);CHKERRQ(ierr);
1328       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1329       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1330     } else {
1331       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1332     }
1333     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1334     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1335     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1336     if (pcbddc->benign_n) {
1337       IS dummy;
1338 
1339       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1340       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1341       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1342     }
1343     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1344   }
1345 
1346   /* Matrix of coarse basis functions (local) */
1347   if (pcbddc->coarse_phi_B) {
1348     PetscInt on_B,on_primal,on_D=n_D;
1349     if (pcbddc->coarse_phi_D) {
1350       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1351     }
1352     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1353     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1354       PetscScalar *marray;
1355 
1356       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1357       ierr = PetscFree(marray);CHKERRQ(ierr);
1358       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1359       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1360       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1361       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1362     }
1363   }
1364 
1365   if (!pcbddc->coarse_phi_B) {
1366     PetscScalar *marray;
1367 
1368     n = n_B*pcbddc->local_primal_size;
1369     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1370       n += n_D*pcbddc->local_primal_size;
1371     }
1372     if (!pcbddc->symmetric_primal) {
1373       n *= 2;
1374     }
1375     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1376     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1377     n = n_B*pcbddc->local_primal_size;
1378     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1379       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1380       n += n_D*pcbddc->local_primal_size;
1381     }
1382     if (!pcbddc->symmetric_primal) {
1383       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1384       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1385         n = n_B*pcbddc->local_primal_size;
1386         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1387       }
1388     } else {
1389       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1390       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1391       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1392         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1393         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1394       }
1395     }
1396   }
1397 
1398   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1399   p0_lidx_I = NULL;
1400   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1401     const PetscInt *idxs;
1402 
1403     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1404     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1405     for (i=0;i<pcbddc->benign_n;i++) {
1406       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1407     }
1408     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1409   }
1410 
1411   /* vertices */
1412   if (n_vertices) {
1413 
1414     ierr = MatConvert(A_VV,MATDENSE,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1415 
1416     if (n_R) {
1417       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1418       PetscBLASInt B_N,B_one = 1;
1419       PetscScalar  *x,*y;
1420       PetscBool    isseqaij;
1421 
1422       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1423       if (lda_rhs == n_R) {
1424         ierr = MatConvert(A_RV,MATDENSE,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1425       } else {
1426         Mat         A_RVt;
1427         PetscScalar *array;
1428 
1429         /* TODO optimize */
1430         ierr = MatConvert(A_RV,MATDENSE,MAT_INITIAL_MATRIX,&A_RVt);CHKERRQ(ierr);
1431         ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1432         ierr = PetscMemzero(work+lda_rhs*n_vertices,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1433         ierr = MatDenseGetArray(A_RVt,&array);CHKERRQ(ierr);
1434         for (i=0;i<n_vertices;i++) {
1435           ierr = PetscMemcpy(work+(n_vertices+i)*lda_rhs,array+i*n_R,n_R*sizeof(PetscScalar));CHKERRQ(ierr);
1436         }
1437         ierr = MatDenseRestoreArray(A_RVt,&array);CHKERRQ(ierr);
1438         ierr = MatDestroy(&A_RVt);CHKERRQ(ierr);
1439         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&A_RV);CHKERRQ(ierr);
1440       }
1441       ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1442       if (F) { /* TODO could be optimized for symmetric problems */
1443         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1444       } else {
1445         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1446         for (i=0;i<n_vertices;i++) {
1447           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr);
1448           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr);
1449           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1450           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1451           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1452         }
1453         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1454       }
1455       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1456       /* S_VV and S_CV */
1457       if (n_constraints) {
1458         Mat B;
1459 
1460         ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1461         for (i=0;i<n_vertices;i++) {
1462           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr);
1463           ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr);
1464           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1465           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1466           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1467           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1468         }
1469         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1470         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1471         ierr = MatDestroy(&B);CHKERRQ(ierr);
1472         ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr);
1473         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1474         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1475         ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr);
1476         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
1477         ierr = MatDestroy(&B);CHKERRQ(ierr);
1478       }
1479       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1480       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1481         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1482       }
1483       if (lda_rhs != n_R) {
1484         ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1485         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1486         ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr);
1487       }
1488       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1489       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1490       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1491       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1492       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1493       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1494       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1495       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1496       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1497       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1498     } else {
1499       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1500     }
1501     for (i=0;i<pcbddc->benign_n;i++) {
1502       const PetscScalar *vals;
1503       const PetscInt    *idxs;
1504       PetscInt          n,j,primal_idx;
1505 
1506       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1507       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1508       for (j=0;j<n;j++) {
1509         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1510         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1511       }
1512       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1513     }
1514     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1515 
1516     /* coarse basis functions */
1517     for (i=0;i<n_vertices;i++) {
1518       PetscScalar *y;
1519 
1520       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1521       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1522       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1523       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525       y[n_B*i+idx_V_B[i]] = 1.0;
1526       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1527       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1528 
1529       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1530         PetscInt j;
1531 
1532         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1533         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1534         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1537         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1538         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1539       }
1540       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1541     }
1542     /* if n_R == 0 the object is not destroyed */
1543     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1544   }
1545 
1546   if (n_constraints) {
1547     Mat B;
1548 
1549     ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr);
1550     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1551     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1552     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1553     if (n_vertices) {
1554       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1555         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1556       } else {
1557         Mat S_VCt;
1558 
1559         if (lda_rhs != n_R) {
1560           ierr = MatDestroy(&B);CHKERRQ(ierr);
1561           ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B);CHKERRQ(ierr);
1562           ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr);
1563         }
1564         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1565         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1566         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1567       }
1568     }
1569     ierr = MatDestroy(&B);CHKERRQ(ierr);
1570     /* coarse basis functions */
1571     for (i=0;i<n_constraints;i++) {
1572       PetscScalar *y;
1573 
1574       ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr);
1575       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1576       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1577       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1578       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1580       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1581       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1582         PetscInt j;
1583 
1584         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1585         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1586         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1588         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1589         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1590         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1591       }
1592       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1593     }
1594   }
1595   if (n_constraints) {
1596     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1597   }
1598   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1599   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1600 
1601   /* compute other basis functions for non-symmetric problems */
1602   if (!pcbddc->symmetric_primal) {
1603     Mat         B_V=NULL,B_C=NULL;
1604     PetscScalar *marray;
1605 
1606     if (n_constraints) {
1607       Mat S_CCT,C_CRT;
1608 
1609       ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr);
1610       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1611       ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1612       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1613       if (n_vertices) {
1614         Mat S_VCT;
1615 
1616         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1617         ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1618         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1619       }
1620       ierr = MatDestroy(&C_CRT);CHKERRQ(ierr);
1621     }
1622     if (n_vertices && n_R) {
1623       PetscScalar    *av,*marray;
1624       const PetscInt *xadj,*adjncy;
1625       PetscInt       n;
1626       PetscBool      flg_row;
1627 
1628       /* B_V = B_V - A_VR^T */
1629       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1630       ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1631       ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr);
1632       ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1633       for (i=0;i<n;i++) {
1634         PetscInt j;
1635         for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
1636       }
1637       ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1638       ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1639       ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1640     }
1641 
1642     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1643     ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr);
1644     for (i=0;i<n_vertices;i++) {
1645       ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr);
1646       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1647       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1648       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1649       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1650     }
1651     ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr);
1652     ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr);
1653     for (i=n_vertices;i<n_constraints+n_vertices;i++) {
1654       ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr);
1655       ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1656       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1657       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1658       ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1659     }
1660     ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr);
1661     /* coarse basis functions */
1662     for (i=0;i<pcbddc->local_primal_size;i++) {
1663       PetscScalar *y;
1664 
1665       ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1666       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1667       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1668       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1669       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1670       if (i<n_vertices) {
1671         y[n_B*i+idx_V_B[i]] = 1.0;
1672       }
1673       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1674       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1675 
1676       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1677         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1678         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1679         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1681         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1682         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1683       }
1684       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1685     }
1686     ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1687     ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1688   }
1689   /* free memory */
1690   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1691   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1692   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1693   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1694   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1695   ierr = PetscFree(work);CHKERRQ(ierr);
1696   if (n_vertices) {
1697     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1698   }
1699   if (n_constraints) {
1700     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1701   }
1702   /* Checking coarse_sub_mat and coarse basis functios */
1703   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1704   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1705   if (pcbddc->dbg_flag) {
1706     Mat         coarse_sub_mat;
1707     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1708     Mat         coarse_phi_D,coarse_phi_B;
1709     Mat         coarse_psi_D,coarse_psi_B;
1710     Mat         A_II,A_BB,A_IB,A_BI;
1711     Mat         C_B,CPHI;
1712     IS          is_dummy;
1713     Vec         mones;
1714     MatType     checkmattype=MATSEQAIJ;
1715     PetscReal   real_value;
1716 
1717     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1718     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1719     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1720     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1721     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1722     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1723     if (!pcbddc->symmetric_primal) {
1724       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1725       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1726     }
1727     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1728 
1729     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1730     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1731     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1732     if (!pcbddc->symmetric_primal) {
1733       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1734       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1735       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1736       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1737       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1738       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1739       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1740       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1741       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1742       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1743       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1744       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1745     } else {
1746       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1747       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1748       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1749       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1750       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1751       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1752       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1753       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1754     }
1755     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1756     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1757     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1758     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1759     if (pcbddc->benign_n) {
1760       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1761       PetscScalar *data,*data2;
1762       PetscInt    j;
1763 
1764       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1765       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1766       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1767       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1768       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1769       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1770       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1771       for (j=0;j<pcbddc->benign_n;j++) {
1772         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1773         for (i=0;i<pcbddc->local_primal_size;i++) {
1774           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1775           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1776         }
1777       }
1778       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1779       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1780       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1781       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1782       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1783       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1784       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1785       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1786       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1787     }
1788 #if 0
1789   {
1790     PetscViewer viewer;
1791     char filename[256];
1792     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
1793     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1794     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1795     ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr);
1796     ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr);
1797     ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr);
1798     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1799     ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr);
1800     ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr);
1801     if (pcbddc->coarse_phi_D) {
1802       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr);
1803       ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr);
1804     }
1805     if (pcbddc->coarse_psi_B) {
1806       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr);
1807       ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr);
1808     }
1809     if (pcbddc->coarse_phi_B) {
1810       ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr);
1811       ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr);
1812     }
1813     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1814   }
1815 #endif
1816     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1817     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1818     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1820 
1821     /* check constraints */
1822     if (!pcbddc->benign_n) { /* TODO: add benign case */
1823       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1824       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1825       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1826       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1827       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1828       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1829       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1830       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1831       if (!pcbddc->symmetric_primal) {
1832         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1833         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1834         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1835         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1836         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1837       }
1838       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1839       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1840       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1841       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1842     }
1843     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1844     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1845     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1846     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1847     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1848     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1849     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1850     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1851     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1852     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1853     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1854     if (!pcbddc->symmetric_primal) {
1855       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1856       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1857     }
1858     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1859   }
1860   /* get back data */
1861   *coarse_submat_vals_n = coarse_submat_vals;
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1867 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1868 {
1869   Mat            *work_mat;
1870   IS             isrow_s,iscol_s;
1871   PetscBool      rsorted,csorted;
1872   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1873   PetscErrorCode ierr;
1874 
1875   PetscFunctionBegin;
1876   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1877   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1878   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1879   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1880 
1881   if (!rsorted) {
1882     const PetscInt *idxs;
1883     PetscInt *idxs_sorted,i;
1884 
1885     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1886     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1887     for (i=0;i<rsize;i++) {
1888       idxs_perm_r[i] = i;
1889     }
1890     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1891     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1892     for (i=0;i<rsize;i++) {
1893       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1894     }
1895     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1896     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1897   } else {
1898     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1899     isrow_s = isrow;
1900   }
1901 
1902   if (!csorted) {
1903     if (isrow == iscol) {
1904       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1905       iscol_s = isrow_s;
1906     } else {
1907       const PetscInt *idxs;
1908       PetscInt *idxs_sorted,i;
1909 
1910       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1911       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1912       for (i=0;i<csize;i++) {
1913         idxs_perm_c[i] = i;
1914       }
1915       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1916       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1917       for (i=0;i<csize;i++) {
1918         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1919       }
1920       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1921       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1922     }
1923   } else {
1924     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1925     iscol_s = iscol;
1926   }
1927 
1928   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1929 
1930   if (!rsorted || !csorted) {
1931     Mat      new_mat;
1932     IS       is_perm_r,is_perm_c;
1933 
1934     if (!rsorted) {
1935       PetscInt *idxs_r,i;
1936       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1937       for (i=0;i<rsize;i++) {
1938         idxs_r[idxs_perm_r[i]] = i;
1939       }
1940       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1941       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1942     } else {
1943       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1944     }
1945     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1946 
1947     if (!csorted) {
1948       if (isrow_s == iscol_s) {
1949         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1950         is_perm_c = is_perm_r;
1951       } else {
1952         PetscInt *idxs_c,i;
1953         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1954         for (i=0;i<csize;i++) {
1955           idxs_c[idxs_perm_c[i]] = i;
1956         }
1957         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1958         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1959       }
1960     } else {
1961       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1962     }
1963     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1964 
1965     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1966     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1967     work_mat[0] = new_mat;
1968     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1969     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1970   }
1971 
1972   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1973   *B = work_mat[0];
1974   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1975   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1976   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1982 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1983 {
1984   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1985   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1986   Mat            new_mat;
1987   IS             is_local,is_global;
1988   PetscInt       local_size;
1989   PetscBool      isseqaij;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1994   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1995   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1996   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1997   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1998   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1999   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
2000 
2001   /* check */
2002   if (pcbddc->dbg_flag) {
2003     Vec       x,x_change;
2004     PetscReal error;
2005 
2006     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
2007     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2008     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
2009     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2010     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2011     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
2012     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2013     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2014     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2015     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2016     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2017     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
2018     ierr = VecDestroy(&x);CHKERRQ(ierr);
2019     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2020   }
2021 
2022   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
2023   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
2024   if (isseqaij) {
2025     Mat M;
2026 
2027     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2028     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2029     ierr = MatDestroy(&M);CHKERRQ(ierr);
2030   } else {
2031     Mat work_mat,M;
2032 
2033     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
2034     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
2035     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
2036     ierr = MatDestroy(&M);CHKERRQ(ierr);
2037     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
2038   }
2039   if (matis->A->symmetric_set) {
2040     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
2041 #if !defined(PETSC_USE_COMPLEX)
2042     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
2043 #endif
2044   }
2045   /*
2046   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2047   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
2048   */
2049   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
2055 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2056 {
2057   PC_IS*          pcis = (PC_IS*)(pc->data);
2058   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2059   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2060   PetscInt        *idx_R_local=NULL;
2061   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2062   PetscInt        vbs,bs;
2063   PetscBT         bitmask=NULL;
2064   PetscErrorCode  ierr;
2065 
2066   PetscFunctionBegin;
2067   /*
2068     No need to setup local scatters if
2069       - primal space is unchanged
2070         AND
2071       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2072         AND
2073       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2074   */
2075   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2076     PetscFunctionReturn(0);
2077   }
2078   /* destroy old objects */
2079   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2080   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2081   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2082   /* Set Non-overlapping dimensions */
2083   n_B = pcis->n_B;
2084   n_D = pcis->n - n_B;
2085   n_vertices = pcbddc->n_vertices;
2086 
2087   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2088 
2089   /* create auxiliary bitmask and allocate workspace */
2090   if (!sub_schurs->reuse_mumps) {
2091     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2092     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2093     for (i=0;i<n_vertices;i++) {
2094       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2095     }
2096 
2097     for (i=0, n_R=0; i<pcis->n; i++) {
2098       if (!PetscBTLookup(bitmask,i)) {
2099         idx_R_local[n_R++] = i;
2100       }
2101     }
2102   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2103     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2104 
2105     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2106     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2107   }
2108 
2109   /* Block code */
2110   vbs = 1;
2111   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2112   if (bs>1 && !(n_vertices%bs)) {
2113     PetscBool is_blocked = PETSC_TRUE;
2114     PetscInt  *vary;
2115     if (!sub_schurs->reuse_mumps) {
2116       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2117       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2118       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2119       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2120       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2121       for (i=0; i<pcis->n/bs; i++) {
2122         if (vary[i]!=0 && vary[i]!=bs) {
2123           is_blocked = PETSC_FALSE;
2124           break;
2125         }
2126       }
2127       ierr = PetscFree(vary);CHKERRQ(ierr);
2128     } else {
2129       /* Verify directly the R set */
2130       for (i=0; i<n_R/bs; i++) {
2131         PetscInt j,node=idx_R_local[bs*i];
2132         for (j=1; j<bs; j++) {
2133           if (node != idx_R_local[bs*i+j]-j) {
2134             is_blocked = PETSC_FALSE;
2135             break;
2136           }
2137         }
2138       }
2139     }
2140     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2141       vbs = bs;
2142       for (i=0;i<n_R/vbs;i++) {
2143         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2144       }
2145     }
2146   }
2147   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2148   if (sub_schurs->reuse_mumps) {
2149     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2150 
2151     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2152     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2153     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2154     reuse_mumps->is_R = pcbddc->is_R_local;
2155   } else {
2156     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2157   }
2158 
2159   /* print some info if requested */
2160   if (pcbddc->dbg_flag) {
2161     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2162     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2163     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2164     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2165     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2166     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2167     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2168   }
2169 
2170   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2171   if (!sub_schurs->reuse_mumps) {
2172     IS       is_aux1,is_aux2;
2173     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2174 
2175     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2176     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2177     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2178     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2179     for (i=0; i<n_D; i++) {
2180       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2181     }
2182     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2183     for (i=0, j=0; i<n_R; i++) {
2184       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2185         aux_array1[j++] = i;
2186       }
2187     }
2188     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2189     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2190     for (i=0, j=0; i<n_B; i++) {
2191       if (!PetscBTLookup(bitmask,is_indices[i])) {
2192         aux_array2[j++] = i;
2193       }
2194     }
2195     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2197     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2198     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2199     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2200 
2201     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2202       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2203       for (i=0, j=0; i<n_R; i++) {
2204         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2205           aux_array1[j++] = i;
2206         }
2207       }
2208       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2209       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2210       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2211     }
2212     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2213     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2214   } else {
2215     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2216     IS               tis;
2217     PetscInt         schur_size;
2218 
2219     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2220     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2221     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2222     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2223     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2224       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2225       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2226       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2227     }
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2235 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2236 {
2237   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2238   PC_IS          *pcis = (PC_IS*)pc->data;
2239   PC             pc_temp;
2240   Mat            A_RR;
2241   MatReuse       reuse;
2242   PetscScalar    m_one = -1.0;
2243   PetscReal      value;
2244   PetscInt       n_D,n_R;
2245   PetscBool      use_exact,use_exact_reduced,issbaij;
2246   PetscErrorCode ierr;
2247   /* prefixes stuff */
2248   char           dir_prefix[256],neu_prefix[256],str_level[16];
2249   size_t         len;
2250 
2251   PetscFunctionBegin;
2252 
2253   /* compute prefixes */
2254   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2255   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2256   if (!pcbddc->current_level) {
2257     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2258     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2259     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2260     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2261   } else {
2262     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2263     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2264     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2265     len -= 15; /* remove "pc_bddc_coarse_" */
2266     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2267     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2268     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2269     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2270     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2271     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2272     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2273     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2274   }
2275 
2276   /* DIRICHLET PROBLEM */
2277   if (dirichlet) {
2278     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2279     if (pcbddc->local_mat->symmetric_set) {
2280       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2281     }
2282     /* Matrix for Dirichlet problem is pcis->A_II */
2283     n_D = pcis->n - pcis->n_B;
2284     if (!pcbddc->ksp_D) { /* create object if not yet build */
2285       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2286       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2287       /* default */
2288       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2289       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2290       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2291       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2292       if (issbaij) {
2293         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2294       } else {
2295         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2296       }
2297       /* Allow user's customization */
2298       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2299       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2300     }
2301     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2302     if (sub_schurs->reuse_mumps) {
2303       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2304 
2305       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2306     }
2307     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2308     if (!n_D) {
2309       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2310       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2311     }
2312     /* Set Up KSP for Dirichlet problem of BDDC */
2313     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2314     /* set ksp_D into pcis data */
2315     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2316     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2317     pcis->ksp_D = pcbddc->ksp_D;
2318   }
2319 
2320   /* NEUMANN PROBLEM */
2321   A_RR = 0;
2322   if (neumann) {
2323     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2324     PetscInt        ibs,mbs;
2325     PetscBool       issbaij;
2326     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2327     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2328     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2329     if (pcbddc->ksp_R) { /* already created ksp */
2330       PetscInt nn_R;
2331       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2332       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2333       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2334       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2335         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2336         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2337         reuse = MAT_INITIAL_MATRIX;
2338       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2339         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2340           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2341           reuse = MAT_INITIAL_MATRIX;
2342         } else { /* safe to reuse the matrix */
2343           reuse = MAT_REUSE_MATRIX;
2344         }
2345       }
2346       /* last check */
2347       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2348         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2349         reuse = MAT_INITIAL_MATRIX;
2350       }
2351     } else { /* first time, so we need to create the matrix */
2352       reuse = MAT_INITIAL_MATRIX;
2353     }
2354     /* extract A_RR */
2355     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2356     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2357     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2358     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2359       if (matis->A == pcbddc->local_mat) {
2360         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2361         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2362       } else {
2363         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2364       }
2365     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2366       if (matis->A == pcbddc->local_mat) {
2367         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2368         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2369       } else {
2370         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2371       }
2372     }
2373     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2374     if (pcbddc->local_mat->symmetric_set) {
2375       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2376     }
2377     if (!pcbddc->ksp_R) { /* create object if not present */
2378       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2379       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2380       /* default */
2381       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2382       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2383       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2384       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2385       if (issbaij) {
2386         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2387       } else {
2388         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2389       }
2390       /* Allow user's customization */
2391       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2392       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2393     }
2394     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2395     if (!n_R) {
2396       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2397       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2398     }
2399     /* Reuse MUMPS solver if it is present */
2400     if (sub_schurs->reuse_mumps) {
2401       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2402 
2403       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2404     }
2405     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2406     /* Set Up KSP for Neumann problem of BDDC */
2407     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2408   }
2409   /* free Neumann problem's matrix */
2410   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2411 
2412   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2413   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2414     if (pcbddc->dbg_flag) {
2415       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2416       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2417       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2418     }
2419     if (dirichlet) { /* Dirichlet */
2420       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2421       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2422       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2423       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2424       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2425       /* need to be adapted? */
2426       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2427       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2428       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2429       /* print info */
2430       if (pcbddc->dbg_flag) {
2431         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2432         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2433       }
2434       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2435         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2436       }
2437     }
2438     if (neumann) { /* Neumann */
2439       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2440       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2441       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2442       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2443       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2444       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2445       /* need to be adapted? */
2446       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2447       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2448       /* print info */
2449       if (pcbddc->dbg_flag) {
2450         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2451         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2452       }
2453       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2454         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2455       }
2456     }
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2463 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2464 {
2465   PetscErrorCode  ierr;
2466   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2467   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2468 
2469   PetscFunctionBegin;
2470   if (!sub_schurs->reuse_mumps) {
2471     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2472   }
2473   if (!pcbddc->switch_static) {
2474     if (applytranspose && pcbddc->local_auxmat1) {
2475       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2476       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2477     }
2478     if (!sub_schurs->reuse_mumps) {
2479       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2480       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2481     } else {
2482       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2483 
2484       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2485       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2486     }
2487   } else {
2488     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2489     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2490     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2491     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2492     if (applytranspose && pcbddc->local_auxmat1) {
2493       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2494       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2495       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2496       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2497     }
2498   }
2499   if (!sub_schurs->reuse_mumps) {
2500     if (applytranspose) {
2501       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2502     } else {
2503       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2504     }
2505 #if defined(PETSC_HAVE_MUMPS)
2506   } else {
2507     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2508 
2509     if (applytranspose) {
2510       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2511     } else {
2512       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2513     }
2514 #endif
2515   }
2516   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2517   if (!pcbddc->switch_static) {
2518     if (!sub_schurs->reuse_mumps) {
2519       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2520       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2521     } else {
2522       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2523 
2524       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2525       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2526     }
2527     if (!applytranspose && pcbddc->local_auxmat1) {
2528       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2529       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2530     }
2531   } else {
2532     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2533     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2534     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2535     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2536     if (!applytranspose && pcbddc->local_auxmat1) {
2537       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2538       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2539     }
2540     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2541     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2542     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2543     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2549 #undef __FUNCT__
2550 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2551 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2552 {
2553   PetscErrorCode ierr;
2554   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2555   PC_IS*            pcis = (PC_IS*)  (pc->data);
2556   const PetscScalar zero = 0.0;
2557 
2558   PetscFunctionBegin;
2559   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2560   if (applytranspose) {
2561     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2562     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2563   } else {
2564     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2565     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2566   }
2567 
2568   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2569   if (pcbddc->benign_n) {
2570     PetscScalar *array;
2571     PetscInt    j;
2572 
2573     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2574     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2575     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2576   }
2577 
2578   /* start communications from local primal nodes to rhs of coarse solver */
2579   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2580   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2581   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2582 
2583   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2584   /* TODO remove null space when doing multilevel */
2585   if (pcbddc->coarse_ksp) {
2586     Mat coarse_mat;
2587     Vec rhs,sol;
2588     MatNullSpace nullsp;
2589 
2590     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2591     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2592     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2593     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2594     if (nullsp) {
2595       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2596     }
2597     if (applytranspose) {
2598       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2599     } else {
2600       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2601     }
2602     if (nullsp) {
2603       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2604     }
2605   }
2606 
2607   /* Local solution on R nodes */
2608   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2609     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2610   }
2611 
2612   /* communications from coarse sol to local primal nodes */
2613   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2614   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2615 
2616   /* Sum contributions from two levels */
2617   if (applytranspose) {
2618     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2619     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2620   } else {
2621     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2622     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2623   }
2624   /* store p0 */
2625   if (pcbddc->benign_n) {
2626     PetscScalar *array;
2627     PetscInt    j;
2628 
2629     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2630     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2631     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2632   }
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2638 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2639 {
2640   PetscErrorCode ierr;
2641   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2642   PetscScalar    *array;
2643   Vec            from,to;
2644 
2645   PetscFunctionBegin;
2646   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2647     from = pcbddc->coarse_vec;
2648     to = pcbddc->vec1_P;
2649     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2650       Vec tvec;
2651 
2652       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2653       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2654       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2655       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2656       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2657       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2658     }
2659   } else { /* from local to global -> put data in coarse right hand side */
2660     from = pcbddc->vec1_P;
2661     to = pcbddc->coarse_vec;
2662   }
2663   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2669 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2670 {
2671   PetscErrorCode ierr;
2672   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2673   PetscScalar    *array;
2674   Vec            from,to;
2675 
2676   PetscFunctionBegin;
2677   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2678     from = pcbddc->coarse_vec;
2679     to = pcbddc->vec1_P;
2680   } else { /* from local to global -> put data in coarse right hand side */
2681     from = pcbddc->vec1_P;
2682     to = pcbddc->coarse_vec;
2683   }
2684   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2685   if (smode == SCATTER_FORWARD) {
2686     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2687       Vec tvec;
2688 
2689       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2690       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2691       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2692       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2693     }
2694   } else {
2695     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2696      ierr = VecResetArray(from);CHKERRQ(ierr);
2697     }
2698   }
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 /* uncomment for testing purposes */
2703 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2704 #undef __FUNCT__
2705 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2706 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2707 {
2708   PetscErrorCode    ierr;
2709   PC_IS*            pcis = (PC_IS*)(pc->data);
2710   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2711   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2712   /* one and zero */
2713   PetscScalar       one=1.0,zero=0.0;
2714   /* space to store constraints and their local indices */
2715   PetscScalar       *constraints_data;
2716   PetscInt          *constraints_idxs,*constraints_idxs_B;
2717   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2718   PetscInt          *constraints_n;
2719   /* iterators */
2720   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2721   /* BLAS integers */
2722   PetscBLASInt      lwork,lierr;
2723   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2724   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2725   /* reuse */
2726   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2727   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2728   /* change of basis */
2729   PetscBool         qr_needed;
2730   PetscBT           change_basis,qr_needed_idx;
2731   /* auxiliary stuff */
2732   PetscInt          *nnz,*is_indices;
2733   PetscInt          ncc;
2734   /* some quantities */
2735   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2736   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2737 
2738   PetscFunctionBegin;
2739   /* Destroy Mat objects computed previously */
2740   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2741   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2742   /* save info on constraints from previous setup (if any) */
2743   olocal_primal_size = pcbddc->local_primal_size;
2744   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2745   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2746   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2747   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2748   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2749   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2750 
2751   /* print some info */
2752   if (pcbddc->dbg_flag) {
2753     IS       vertices;
2754     PetscInt nv,nedges,nfaces;
2755     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2756     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2757     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2758     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2759     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2760     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2761     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2762     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2763     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2764   }
2765 
2766   if (!pcbddc->adaptive_selection) {
2767     IS           ISForVertices,*ISForFaces,*ISForEdges;
2768     MatNullSpace nearnullsp;
2769     const Vec    *nearnullvecs;
2770     Vec          *localnearnullsp;
2771     PetscScalar  *array;
2772     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2773     PetscBool    nnsp_has_cnst;
2774     /* LAPACK working arrays for SVD or POD */
2775     PetscBool    skip_lapack,boolforchange;
2776     PetscScalar  *work;
2777     PetscReal    *singular_vals;
2778 #if defined(PETSC_USE_COMPLEX)
2779     PetscReal    *rwork;
2780 #endif
2781 #if defined(PETSC_MISSING_LAPACK_GESVD)
2782     PetscScalar  *temp_basis,*correlation_mat;
2783 #else
2784     PetscBLASInt dummy_int=1;
2785     PetscScalar  dummy_scalar=1.;
2786 #endif
2787 
2788     /* Get index sets for faces, edges and vertices from graph */
2789     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2790     /* free unneeded index sets */
2791     if (!pcbddc->use_vertices) {
2792       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2793     }
2794     if (!pcbddc->use_edges) {
2795       for (i=0;i<n_ISForEdges;i++) {
2796         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2797       }
2798       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2799       n_ISForEdges = 0;
2800     }
2801     if (!pcbddc->use_faces) {
2802       for (i=0;i<n_ISForFaces;i++) {
2803         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2804       }
2805       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2806       n_ISForFaces = 0;
2807     }
2808 
2809 #if defined(PETSC_USE_DEBUG)
2810     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2811        Also use_change_of_basis should be consistent among processors */
2812     if (pcbddc->NullSpace) {
2813       PetscBool tbool[2],gbool[2];
2814 
2815       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2816         pcbddc->use_change_of_basis = PETSC_TRUE;
2817         if (!ISForEdges) {
2818           pcbddc->use_change_on_faces = PETSC_TRUE;
2819         }
2820       }
2821       tbool[0] = pcbddc->use_change_of_basis;
2822       tbool[1] = pcbddc->use_change_on_faces;
2823       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2824       pcbddc->use_change_of_basis = gbool[0];
2825       pcbddc->use_change_on_faces = gbool[1];
2826     }
2827 #endif
2828 
2829     /* check if near null space is attached to global mat */
2830     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2831     if (nearnullsp) {
2832       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2833       /* remove any stored info */
2834       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2835       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2836       /* store information for BDDC solver reuse */
2837       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2838       pcbddc->onearnullspace = nearnullsp;
2839       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2840       for (i=0;i<nnsp_size;i++) {
2841         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2842       }
2843     } else { /* if near null space is not provided BDDC uses constants by default */
2844       nnsp_size = 0;
2845       nnsp_has_cnst = PETSC_TRUE;
2846     }
2847     /* get max number of constraints on a single cc */
2848     max_constraints = nnsp_size;
2849     if (nnsp_has_cnst) max_constraints++;
2850 
2851     /*
2852          Evaluate maximum storage size needed by the procedure
2853          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2854          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2855          There can be multiple constraints per connected component
2856                                                                                                                                                            */
2857     n_vertices = 0;
2858     if (ISForVertices) {
2859       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2860     }
2861     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2862     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2863 
2864     total_counts = n_ISForFaces+n_ISForEdges;
2865     total_counts *= max_constraints;
2866     total_counts += n_vertices;
2867     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2868 
2869     total_counts = 0;
2870     max_size_of_constraint = 0;
2871     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2872       IS used_is;
2873       if (i<n_ISForEdges) {
2874         used_is = ISForEdges[i];
2875       } else {
2876         used_is = ISForFaces[i-n_ISForEdges];
2877       }
2878       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2879       total_counts += j;
2880       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2881     }
2882     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2883 
2884     /* get local part of global near null space vectors */
2885     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2886     for (k=0;k<nnsp_size;k++) {
2887       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2888       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2889       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2890     }
2891 
2892     /* whether or not to skip lapack calls */
2893     skip_lapack = PETSC_TRUE;
2894     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2895 
2896     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2897     if (!skip_lapack) {
2898       PetscScalar temp_work;
2899 
2900 #if defined(PETSC_MISSING_LAPACK_GESVD)
2901       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2902       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2903       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2904       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2905 #if defined(PETSC_USE_COMPLEX)
2906       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2907 #endif
2908       /* now we evaluate the optimal workspace using query with lwork=-1 */
2909       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2910       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2911       lwork = -1;
2912       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2913 #if !defined(PETSC_USE_COMPLEX)
2914       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2915 #else
2916       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2917 #endif
2918       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2919       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2920 #else /* on missing GESVD */
2921       /* SVD */
2922       PetscInt max_n,min_n;
2923       max_n = max_size_of_constraint;
2924       min_n = max_constraints;
2925       if (max_size_of_constraint < max_constraints) {
2926         min_n = max_size_of_constraint;
2927         max_n = max_constraints;
2928       }
2929       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2930 #if defined(PETSC_USE_COMPLEX)
2931       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2932 #endif
2933       /* now we evaluate the optimal workspace using query with lwork=-1 */
2934       lwork = -1;
2935       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2936       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2937       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2938       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2939 #if !defined(PETSC_USE_COMPLEX)
2940       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2941 #else
2942       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2943 #endif
2944       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2945       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2946 #endif /* on missing GESVD */
2947       /* Allocate optimal workspace */
2948       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2949       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2950     }
2951     /* Now we can loop on constraining sets */
2952     total_counts = 0;
2953     constraints_idxs_ptr[0] = 0;
2954     constraints_data_ptr[0] = 0;
2955     /* vertices */
2956     if (n_vertices) {
2957       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2958       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2959       for (i=0;i<n_vertices;i++) {
2960         constraints_n[total_counts] = 1;
2961         constraints_data[total_counts] = 1.0;
2962         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2963         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2964         total_counts++;
2965       }
2966       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2967       n_vertices = total_counts;
2968     }
2969 
2970     /* edges and faces */
2971     total_counts_cc = total_counts;
2972     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2973       IS        used_is;
2974       PetscBool idxs_copied = PETSC_FALSE;
2975 
2976       if (ncc<n_ISForEdges) {
2977         used_is = ISForEdges[ncc];
2978         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2979       } else {
2980         used_is = ISForFaces[ncc-n_ISForEdges];
2981         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2982       }
2983       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2984 
2985       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2986       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2987       /* change of basis should not be performed on local periodic nodes */
2988       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2989       if (nnsp_has_cnst) {
2990         PetscScalar quad_value;
2991 
2992         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2993         idxs_copied = PETSC_TRUE;
2994 
2995         if (!pcbddc->use_nnsp_true) {
2996           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2997         } else {
2998           quad_value = 1.0;
2999         }
3000         for (j=0;j<size_of_constraint;j++) {
3001           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
3002         }
3003         temp_constraints++;
3004         total_counts++;
3005       }
3006       for (k=0;k<nnsp_size;k++) {
3007         PetscReal real_value;
3008         PetscScalar *ptr_to_data;
3009 
3010         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
3011         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
3012         for (j=0;j<size_of_constraint;j++) {
3013           ptr_to_data[j] = array[is_indices[j]];
3014         }
3015         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
3016         /* check if array is null on the connected component */
3017         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3018         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
3019         if (real_value > 0.0) { /* keep indices and values */
3020           temp_constraints++;
3021           total_counts++;
3022           if (!idxs_copied) {
3023             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
3024             idxs_copied = PETSC_TRUE;
3025           }
3026         }
3027       }
3028       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3029       valid_constraints = temp_constraints;
3030       if (!pcbddc->use_nnsp_true && temp_constraints) {
3031         if (temp_constraints == 1) { /* just normalize the constraint */
3032           PetscScalar norm,*ptr_to_data;
3033 
3034           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3035           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3036           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
3037           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
3038           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
3039         } else { /* perform SVD */
3040           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
3041           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
3042 
3043 #if defined(PETSC_MISSING_LAPACK_GESVD)
3044           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
3045              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
3046              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
3047                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3048                 from that computed using LAPACKgesvd
3049              -> This is due to a different computation of eigenvectors in LAPACKheev
3050              -> The quality of the POD-computed basis will be the same */
3051           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3052           /* Store upper triangular part of correlation matrix */
3053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3054           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3055           for (j=0;j<temp_constraints;j++) {
3056             for (k=0;k<j+1;k++) {
3057               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3058             }
3059           }
3060           /* compute eigenvalues and eigenvectors of correlation matrix */
3061           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3062           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3063 #if !defined(PETSC_USE_COMPLEX)
3064           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3065 #else
3066           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3067 #endif
3068           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3069           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3070           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3071           j = 0;
3072           while (j < temp_constraints && singular_vals[j] < tol) j++;
3073           total_counts = total_counts-j;
3074           valid_constraints = temp_constraints-j;
3075           /* scale and copy POD basis into used quadrature memory */
3076           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3077           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3078           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3079           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3080           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3081           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3082           if (j<temp_constraints) {
3083             PetscInt ii;
3084             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3085             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3086             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3087             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3088             for (k=0;k<temp_constraints-j;k++) {
3089               for (ii=0;ii<size_of_constraint;ii++) {
3090                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3091               }
3092             }
3093           }
3094 #else  /* on missing GESVD */
3095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3096           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3097           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3098           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3099 #if !defined(PETSC_USE_COMPLEX)
3100           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3101 #else
3102           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3103 #endif
3104           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3105           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3106           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3107           k = temp_constraints;
3108           if (k > size_of_constraint) k = size_of_constraint;
3109           j = 0;
3110           while (j < k && singular_vals[k-j-1] < tol) j++;
3111           valid_constraints = k-j;
3112           total_counts = total_counts-temp_constraints+valid_constraints;
3113 #endif /* on missing GESVD */
3114         }
3115       }
3116       /* update pointers information */
3117       if (valid_constraints) {
3118         constraints_n[total_counts_cc] = valid_constraints;
3119         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3120         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3121         /* set change_of_basis flag */
3122         if (boolforchange) {
3123           PetscBTSet(change_basis,total_counts_cc);
3124         }
3125         total_counts_cc++;
3126       }
3127     }
3128     /* free workspace */
3129     if (!skip_lapack) {
3130       ierr = PetscFree(work);CHKERRQ(ierr);
3131 #if defined(PETSC_USE_COMPLEX)
3132       ierr = PetscFree(rwork);CHKERRQ(ierr);
3133 #endif
3134       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3135 #if defined(PETSC_MISSING_LAPACK_GESVD)
3136       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3137       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3138 #endif
3139     }
3140     for (k=0;k<nnsp_size;k++) {
3141       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3142     }
3143     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3144     /* free index sets of faces, edges and vertices */
3145     for (i=0;i<n_ISForFaces;i++) {
3146       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3147     }
3148     if (n_ISForFaces) {
3149       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3150     }
3151     for (i=0;i<n_ISForEdges;i++) {
3152       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3153     }
3154     if (n_ISForEdges) {
3155       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3156     }
3157     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3158   } else {
3159     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3160 
3161     total_counts = 0;
3162     n_vertices = 0;
3163     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3164       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3165     }
3166     max_constraints = 0;
3167     total_counts_cc = 0;
3168     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3169       total_counts += pcbddc->adaptive_constraints_n[i];
3170       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3171       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3172     }
3173     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3174     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3175     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3176     constraints_data = pcbddc->adaptive_constraints_data;
3177     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3178     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3179     total_counts_cc = 0;
3180     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3181       if (pcbddc->adaptive_constraints_n[i]) {
3182         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3183       }
3184     }
3185 #if 0
3186     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3187     for (i=0;i<total_counts_cc;i++) {
3188       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3189       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3190       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3191         printf(" %d",constraints_idxs[j]);
3192       }
3193       printf("\n");
3194       printf("number of cc: %d\n",constraints_n[i]);
3195     }
3196     for (i=0;i<n_vertices;i++) {
3197       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3198     }
3199     for (i=0;i<sub_schurs->n_subs;i++) {
3200       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3201     }
3202 #endif
3203 
3204     max_size_of_constraint = 0;
3205     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3206     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3207     /* Change of basis */
3208     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3209     if (pcbddc->use_change_of_basis) {
3210       for (i=0;i<sub_schurs->n_subs;i++) {
3211         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3212           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3213         }
3214       }
3215     }
3216   }
3217   pcbddc->local_primal_size = total_counts;
3218   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3219 
3220   /* map constraints_idxs in boundary numbering */
3221   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3222   if (i != constraints_idxs_ptr[total_counts_cc]) {
3223     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3224   }
3225 
3226   /* Create constraint matrix */
3227   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3228   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3229   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3230 
3231   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3232   /* determine if a QR strategy is needed for change of basis */
3233   qr_needed = PETSC_FALSE;
3234   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3235   total_primal_vertices=0;
3236   pcbddc->local_primal_size_cc = 0;
3237   for (i=0;i<total_counts_cc;i++) {
3238     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3239     if (size_of_constraint == 1) {
3240       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3241       pcbddc->local_primal_size_cc += 1;
3242     } else if (PetscBTLookup(change_basis,i)) {
3243       for (k=0;k<constraints_n[i];k++) {
3244         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3245       }
3246       pcbddc->local_primal_size_cc += constraints_n[i];
3247       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3248         PetscBTSet(qr_needed_idx,i);
3249         qr_needed = PETSC_TRUE;
3250       }
3251     } else {
3252       pcbddc->local_primal_size_cc += 1;
3253     }
3254   }
3255   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3256   pcbddc->n_vertices = total_primal_vertices;
3257   /* permute indices in order to have a sorted set of vertices */
3258   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3259 
3260   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3261   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3262   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3263 
3264   /* nonzero structure of constraint matrix */
3265   /* and get reference dof for local constraints */
3266   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3267   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3268 
3269   j = total_primal_vertices;
3270   total_counts = total_primal_vertices;
3271   cum = total_primal_vertices;
3272   for (i=n_vertices;i<total_counts_cc;i++) {
3273     if (!PetscBTLookup(change_basis,i)) {
3274       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3275       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3276       cum++;
3277       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3278       for (k=0;k<constraints_n[i];k++) {
3279         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3280         nnz[j+k] = size_of_constraint;
3281       }
3282       j += constraints_n[i];
3283     }
3284   }
3285   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3286   ierr = PetscFree(nnz);CHKERRQ(ierr);
3287 
3288   /* set values in constraint matrix */
3289   for (i=0;i<total_primal_vertices;i++) {
3290     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3291   }
3292   total_counts = total_primal_vertices;
3293   for (i=n_vertices;i<total_counts_cc;i++) {
3294     if (!PetscBTLookup(change_basis,i)) {
3295       PetscInt *cols;
3296 
3297       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3298       cols = constraints_idxs+constraints_idxs_ptr[i];
3299       for (k=0;k<constraints_n[i];k++) {
3300         PetscInt    row = total_counts+k;
3301         PetscScalar *vals;
3302 
3303         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3304         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3305       }
3306       total_counts += constraints_n[i];
3307     }
3308   }
3309   /* assembling */
3310   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312 
3313   /*
3314   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3315   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3316   */
3317   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3318   if (pcbddc->use_change_of_basis) {
3319     /* dual and primal dofs on a single cc */
3320     PetscInt     dual_dofs,primal_dofs;
3321     /* working stuff for GEQRF */
3322     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3323     PetscBLASInt lqr_work;
3324     /* working stuff for UNGQR */
3325     PetscScalar  *gqr_work,lgqr_work_t;
3326     PetscBLASInt lgqr_work;
3327     /* working stuff for TRTRS */
3328     PetscScalar  *trs_rhs;
3329     PetscBLASInt Blas_NRHS;
3330     /* pointers for values insertion into change of basis matrix */
3331     PetscInt     *start_rows,*start_cols;
3332     PetscScalar  *start_vals;
3333     /* working stuff for values insertion */
3334     PetscBT      is_primal;
3335     PetscInt     *aux_primal_numbering_B;
3336     /* matrix sizes */
3337     PetscInt     global_size,local_size;
3338     /* temporary change of basis */
3339     Mat          localChangeOfBasisMatrix;
3340     /* extra space for debugging */
3341     PetscScalar  *dbg_work;
3342 
3343     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3344     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3345     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3346     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3347     /* nonzeros for local mat */
3348     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3349     for (i=0;i<pcis->n;i++) nnz[i]=1;
3350     for (i=n_vertices;i<total_counts_cc;i++) {
3351       if (PetscBTLookup(change_basis,i)) {
3352         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3353         if (PetscBTLookup(qr_needed_idx,i)) {
3354           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3355         } else {
3356           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3357           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3358         }
3359       }
3360     }
3361     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3362     ierr = PetscFree(nnz);CHKERRQ(ierr);
3363     /* Set initial identity in the matrix */
3364     for (i=0;i<pcis->n;i++) {
3365       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3366     }
3367 
3368     if (pcbddc->dbg_flag) {
3369       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3370       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3371     }
3372 
3373 
3374     /* Now we loop on the constraints which need a change of basis */
3375     /*
3376        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3377        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3378 
3379        Basic blocks of change of basis matrix T computed by
3380 
3381           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3382 
3383             | 1        0   ...        0         s_1/S |
3384             | 0        1   ...        0         s_2/S |
3385             |              ...                        |
3386             | 0        ...            1     s_{n-1}/S |
3387             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3388 
3389             with S = \sum_{i=1}^n s_i^2
3390             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3391                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3392 
3393           - QR decomposition of constraints otherwise
3394     */
3395     if (qr_needed) {
3396       /* space to store Q */
3397       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3398       /* first we issue queries for optimal work */
3399       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3400       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3401       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3402       lqr_work = -1;
3403       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3404       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3405       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3406       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3407       lgqr_work = -1;
3408       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3409       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3410       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3411       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3412       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3413       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3414       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3415       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3416       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3417       /* array to store scaling factors for reflectors */
3418       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3419       /* array to store rhs and solution of triangular solver */
3420       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3421       /* allocating workspace for check */
3422       if (pcbddc->dbg_flag) {
3423         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3424       }
3425     }
3426     /* array to store whether a node is primal or not */
3427     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3428     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3429     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3430     if (i != total_primal_vertices) {
3431       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3432     }
3433     for (i=0;i<total_primal_vertices;i++) {
3434       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3435     }
3436     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3437 
3438     /* loop on constraints and see whether or not they need a change of basis and compute it */
3439     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3440       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3441       if (PetscBTLookup(change_basis,total_counts)) {
3442         /* get constraint info */
3443         primal_dofs = constraints_n[total_counts];
3444         dual_dofs = size_of_constraint-primal_dofs;
3445 
3446         if (pcbddc->dbg_flag) {
3447           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3448         }
3449 
3450         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3451 
3452           /* copy quadrature constraints for change of basis check */
3453           if (pcbddc->dbg_flag) {
3454             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3455           }
3456           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3457           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3458 
3459           /* compute QR decomposition of constraints */
3460           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3461           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3462           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3463           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3464           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3465           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3466           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3467 
3468           /* explictly compute R^-T */
3469           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3470           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3471           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3472           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3473           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3474           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3475           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3476           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3477           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3478           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3479 
3480           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3481           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3482           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3483           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3484           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3485           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3486           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3487           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3488           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3489 
3490           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3491              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3492              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3493           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3494           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3495           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3496           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3497           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3499           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3500           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3501           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3502           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3503 
3504           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3505           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3506           /* insert cols for primal dofs */
3507           for (j=0;j<primal_dofs;j++) {
3508             start_vals = &qr_basis[j*size_of_constraint];
3509             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3510             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3511           }
3512           /* insert cols for dual dofs */
3513           for (j=0,k=0;j<dual_dofs;k++) {
3514             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3515               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3516               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3517               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3518               j++;
3519             }
3520           }
3521 
3522           /* check change of basis */
3523           if (pcbddc->dbg_flag) {
3524             PetscInt   ii,jj;
3525             PetscBool valid_qr=PETSC_TRUE;
3526             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3527             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3528             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3529             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3530             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3531             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3532             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3533             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3534             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3535             for (jj=0;jj<size_of_constraint;jj++) {
3536               for (ii=0;ii<primal_dofs;ii++) {
3537                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3538                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3539               }
3540             }
3541             if (!valid_qr) {
3542               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3543               for (jj=0;jj<size_of_constraint;jj++) {
3544                 for (ii=0;ii<primal_dofs;ii++) {
3545                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3546                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3547                   }
3548                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3549                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3550                   }
3551                 }
3552               }
3553             } else {
3554               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3555             }
3556           }
3557         } else { /* simple transformation block */
3558           PetscInt    row,col;
3559           PetscScalar val,norm;
3560 
3561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3562           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3563           for (j=0;j<size_of_constraint;j++) {
3564             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3565             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3566             if (!PetscBTLookup(is_primal,row_B)) {
3567               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3568               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3569               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3570             } else {
3571               for (k=0;k<size_of_constraint;k++) {
3572                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3573                 if (row != col) {
3574                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3575                 } else {
3576                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3577                 }
3578                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3579               }
3580             }
3581           }
3582           if (pcbddc->dbg_flag) {
3583             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3584           }
3585         }
3586       } else {
3587         if (pcbddc->dbg_flag) {
3588           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3589         }
3590       }
3591     }
3592 
3593     /* free workspace */
3594     if (qr_needed) {
3595       if (pcbddc->dbg_flag) {
3596         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3597       }
3598       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3599       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3600       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3601       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3602       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3603     }
3604     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3605     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607 
3608     /* assembling of global change of variable */
3609     {
3610       Mat      tmat;
3611       PetscInt bs;
3612 
3613       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3614       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3615       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3616       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3617       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3618       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3619       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3620       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3621       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3622       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3623       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3624       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3625       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3626       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3627       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3628       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3629       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3630       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3631     }
3632     /* check */
3633     if (pcbddc->dbg_flag) {
3634       PetscReal error;
3635       Vec       x,x_change;
3636 
3637       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3638       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3639       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3640       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3641       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3642       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3643       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3644       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3645       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3646       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3647       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3648       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3649       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3650       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3651       ierr = VecDestroy(&x);CHKERRQ(ierr);
3652       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3653     }
3654 
3655     /* adapt sub_schurs computed (if any) */
3656     if (pcbddc->use_deluxe_scaling) {
3657       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3658       if (sub_schurs->S_Ej_all) {
3659         Mat                    S_new,tmat;
3660         ISLocalToGlobalMapping NtoSall;
3661         IS                     is_all_N,is_V,is_V_Sall;
3662         const PetscScalar      *array;
3663         const PetscInt         *idxs_V,*idxs_all;
3664         PetscInt               i,n_V;
3665 
3666         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3667         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3668         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3669         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3670         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3671         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3672         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3673         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3674         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3675         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3676         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3677         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3678         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3679         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3680         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3681         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3682         for (i=0;i<n_V;i++) {
3683           PetscScalar val;
3684           PetscInt    idx;
3685 
3686           idx = idxs_V[i];
3687           val = array[idxs_all[idxs_V[i]]];
3688           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3689         }
3690         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692         sub_schurs->S_Ej_all = S_new;
3693         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3694         if (sub_schurs->sum_S_Ej_all) {
3695           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3696           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3697           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3698           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3699           sub_schurs->sum_S_Ej_all = S_new;
3700           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3701         }
3702         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3703         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3704         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3705         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3706         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3707       }
3708     }
3709     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3710   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3711     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3712     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3713   }
3714 
3715   /* set up change of basis context */
3716   if (pcbddc->ChangeOfBasisMatrix) {
3717     PCBDDCChange_ctx change_ctx;
3718 
3719     if (!pcbddc->new_global_mat) {
3720       PetscInt global_size,local_size;
3721 
3722       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3723       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3724       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3725       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3726       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3727       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3728       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3729       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3730       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3731     } else {
3732       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3733       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3734       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3735     }
3736     if (!pcbddc->user_ChangeOfBasisMatrix) {
3737       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3738       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3739     } else {
3740       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3741       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3742     }
3743     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3744     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3745     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3746     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3747   }
3748 
3749   /* add pressure dofs to set of primal nodes for numbering purposes */
3750   for (i=0;i<pcbddc->benign_n;i++) {
3751     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3752     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3753     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3754     pcbddc->local_primal_size_cc++;
3755     pcbddc->local_primal_size++;
3756   }
3757 
3758   /* check if a new primal space has been introduced (also take into account benign trick) */
3759   pcbddc->new_primal_space_local = PETSC_TRUE;
3760   if (olocal_primal_size == pcbddc->local_primal_size) {
3761     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3762     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3763     if (!pcbddc->new_primal_space_local) {
3764       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3765       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3766     }
3767   }
3768   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3769   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3770   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3771 
3772   /* flush dbg viewer */
3773   if (pcbddc->dbg_flag) {
3774     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3775   }
3776 
3777   /* free workspace */
3778   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3779   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3780   if (!pcbddc->adaptive_selection) {
3781     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3782     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3783   } else {
3784     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3785                       pcbddc->adaptive_constraints_idxs_ptr,
3786                       pcbddc->adaptive_constraints_data_ptr,
3787                       pcbddc->adaptive_constraints_idxs,
3788                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3789     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3790     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3791   }
3792   PetscFunctionReturn(0);
3793 }
3794 
3795 #undef __FUNCT__
3796 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3797 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3798 {
3799   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3800   PC_IS       *pcis = (PC_IS*)pc->data;
3801   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3802   PetscInt    ierr,i,vertex_size,N;
3803   PetscViewer viewer=pcbddc->dbg_viewer;
3804 
3805   PetscFunctionBegin;
3806   /* Reset previously computed graph */
3807   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3808   /* Init local Graph struct */
3809   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3810   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3811 
3812   /* Check validity of the csr graph passed in by the user */
3813   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3814     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3815   }
3816 
3817   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3818   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3819     PetscInt  *xadj,*adjncy;
3820     PetscInt  nvtxs;
3821     PetscBool flg_row=PETSC_FALSE;
3822 
3823     if (pcbddc->use_local_adj) {
3824 
3825       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3826       if (flg_row) {
3827         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3828         pcbddc->computed_rowadj = PETSC_TRUE;
3829       }
3830       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3831     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3832       IS                     is_dummy;
3833       ISLocalToGlobalMapping l2gmap_dummy;
3834       PetscInt               j,sum;
3835       PetscInt               *cxadj,*cadjncy;
3836       const PetscInt         *idxs;
3837       PCBDDCGraph            graph;
3838       PetscBT                is_on_boundary;
3839 
3840       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3841       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3842       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3843       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3844       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3845       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3846       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3847       if (flg_row) {
3848         graph->xadj = xadj;
3849         graph->adjncy = adjncy;
3850       }
3851       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3852       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3853       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3854 
3855       if (pcbddc->dbg_flag) {
3856         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3857         for (i=0;i<graph->ncc;i++) {
3858           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3859         }
3860       }
3861 
3862       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3863       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3864       for (i=0;i<pcis->n_B;i++) {
3865         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3866       }
3867       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3868 
3869       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3870       sum = 0;
3871       for (i=0;i<graph->ncc;i++) {
3872         PetscInt sizecc = 0;
3873         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3874           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3875             sizecc++;
3876           }
3877         }
3878         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3879           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3880             cxadj[graph->queue[j]] = sizecc;
3881           }
3882         }
3883         sum += sizecc*sizecc;
3884       }
3885       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3886       sum = 0;
3887       for (i=0;i<pcis->n;i++) {
3888         PetscInt temp = cxadj[i];
3889         cxadj[i] = sum;
3890         sum += temp;
3891       }
3892       cxadj[pcis->n] = sum;
3893       for (i=0;i<graph->ncc;i++) {
3894         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3895           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3896             PetscInt k,sizecc = 0;
3897             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3898               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3899                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3900                 sizecc++;
3901               }
3902             }
3903           }
3904         }
3905       }
3906       if (sum) {
3907         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3908       } else {
3909         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3910         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3911       }
3912       graph->xadj = 0;
3913       graph->adjncy = 0;
3914       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3915       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3916     }
3917   }
3918   if (pcbddc->dbg_flag) {
3919     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3920   }
3921 
3922   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3923   vertex_size = 1;
3924   if (pcbddc->user_provided_isfordofs) {
3925     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3926       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3927       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3928         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3929         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3930       }
3931       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3932       pcbddc->n_ISForDofs = 0;
3933       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3934     }
3935     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3936     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3937   } else {
3938     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3939       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3940       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3941       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3942         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3943       }
3944     }
3945   }
3946 
3947   /* Setup of Graph */
3948   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3949     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3950   }
3951   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3952     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3953   }
3954   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { /* need to convert from global to local */
3955     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3956   }
3957   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr);
3958 
3959   /* attach info on disconnected subdomains if present */
3960   if (pcbddc->n_local_subs) {
3961     PetscInt *local_subs;
3962 
3963     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3964     for (i=0;i<pcbddc->n_local_subs;i++) {
3965       const PetscInt *idxs;
3966       PetscInt       nl,j;
3967 
3968       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3969       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3970       for (j=0;j<nl;j++) {
3971         local_subs[idxs[j]] = i;
3972       }
3973       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3974     }
3975     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3976     pcbddc->mat_graph->local_subs = local_subs;
3977   }
3978 
3979   /* Graph's connected components analysis */
3980   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3981 
3982   /* print some info to stdout */
3983   if (pcbddc->dbg_flag) {
3984     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3985   }
3986 
3987   /* mark topography has done */
3988   pcbddc->recompute_topography = PETSC_FALSE;
3989   PetscFunctionReturn(0);
3990 }
3991 
3992 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3993 #undef __FUNCT__
3994 #define __FUNCT__ "PCBDDCSubsetNumbering"
3995 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3996 {
3997   PetscSF        sf;
3998   PetscLayout    map;
3999   const PetscInt *idxs;
4000   PetscInt       *leaf_data,*root_data,*gidxs;
4001   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
4002   PetscInt       n_n,nlocals,start,first_index;
4003   PetscMPIInt    commsize;
4004   PetscBool      first_found;
4005   PetscErrorCode ierr;
4006 
4007   PetscFunctionBegin;
4008   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
4009   if (subset_mult) {
4010     PetscCheckSameComm(subset,1,subset_mult,2);
4011     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
4012     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
4013   }
4014   /* create workspace layout for computing global indices of subset */
4015   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
4016   lbounds[0] = lbounds[1] = 0;
4017   for (i=0;i<n;i++) {
4018     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
4019     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
4020   }
4021   lbounds[0] = -lbounds[0];
4022   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4023   gbounds[0] = -gbounds[0];
4024   N = gbounds[1] - gbounds[0] + 1;
4025   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
4026   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
4027   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
4028   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
4029   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
4030 
4031   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
4032   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
4033   if (subset_mult) {
4034     const PetscInt* idxs_mult;
4035 
4036     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4037     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
4038     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4039   } else {
4040     for (i=0;i<n;i++) leaf_data[i] = 1;
4041   }
4042   /* local size of new subset */
4043   n_n = 0;
4044   for (i=0;i<n;i++) n_n += leaf_data[i];
4045 
4046   /* global indexes in layout */
4047   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
4048   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
4049   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
4050   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4051   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4052   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4053 
4054   /* reduce from leaves to roots */
4055   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4056   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4057   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4058 
4059   /* count indexes in local part of layout */
4060   nlocals = 0;
4061   first_index = -1;
4062   first_found = PETSC_FALSE;
4063   for (i=0;i<Nl;i++) {
4064     if (!first_found && root_data[i]) {
4065       first_found = PETSC_TRUE;
4066       first_index = i;
4067     }
4068     nlocals += root_data[i];
4069   }
4070 
4071   /* cumulative of number of indexes and size of subset without holes */
4072 #if defined(PETSC_HAVE_MPI_EXSCAN)
4073   start = 0;
4074   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4075 #else
4076   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4077   start = start-nlocals;
4078 #endif
4079 
4080   if (N_n) { /* compute total size of new subset if requested */
4081     *N_n = start + nlocals;
4082     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4083     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4084   }
4085 
4086   /* adapt root data with cumulative */
4087   if (first_found) {
4088     PetscInt old_index;
4089 
4090     root_data[first_index] += start;
4091     old_index = first_index;
4092     for (i=first_index+1;i<Nl;i++) {
4093       if (root_data[i]) {
4094         root_data[i] += root_data[old_index];
4095         old_index = i;
4096       }
4097     }
4098   }
4099 
4100   /* from roots to leaves */
4101   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4102   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4103   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4104 
4105   /* create new IS with global indexes without holes */
4106   if (subset_mult) {
4107     const PetscInt* idxs_mult;
4108     PetscInt        cum;
4109 
4110     cum = 0;
4111     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4112     for (i=0;i<n;i++) {
4113       PetscInt j;
4114       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4115     }
4116     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4117   } else {
4118     for (i=0;i<n;i++) {
4119       gidxs[i] = leaf_data[i]-1;
4120     }
4121   }
4122   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4123   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4124   PetscFunctionReturn(0);
4125 }
4126 
4127 #undef __FUNCT__
4128 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4129 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4130 {
4131   PetscInt       i,j;
4132   PetscScalar    *alphas;
4133   PetscErrorCode ierr;
4134 
4135   PetscFunctionBegin;
4136   /* this implements stabilized Gram-Schmidt */
4137   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4138   for (i=0;i<n;i++) {
4139     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4140     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4141     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4142   }
4143   ierr = PetscFree(alphas);CHKERRQ(ierr);
4144   PetscFunctionReturn(0);
4145 }
4146 
4147 #undef __FUNCT__
4148 #define __FUNCT__ "MatISGetSubassemblingPattern"
4149 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4150 {
4151   IS             ranks_send_to;
4152   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4153   PetscMPIInt    size,rank,color;
4154   PetscInt       *xadj,*adjncy;
4155   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4156   PetscInt       i,local_size,threshold=0;
4157   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4158   PetscSubcomm   subcomm;
4159   PetscErrorCode ierr;
4160 
4161   PetscFunctionBegin;
4162   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4163   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4164   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4165 
4166   /* Get info on mapping */
4167   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4168   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4169 
4170   /* build local CSR graph of subdomains' connectivity */
4171   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4172   xadj[0] = 0;
4173   xadj[1] = PetscMax(n_neighs-1,0);
4174   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4175   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4176 
4177   if (threshold) {
4178     PetscInt xadj_count = 0;
4179     for (i=1;i<n_neighs;i++) {
4180       if (n_shared[i] > threshold) {
4181         adjncy[xadj_count] = neighs[i];
4182         adjncy_wgt[xadj_count] = n_shared[i];
4183         xadj_count++;
4184       }
4185     }
4186     xadj[1] = xadj_count;
4187   } else {
4188     if (xadj[1]) {
4189       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4190       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4191     }
4192   }
4193   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4194   if (use_square) {
4195     for (i=0;i<xadj[1];i++) {
4196       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4197     }
4198   }
4199   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4200 
4201   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4202 
4203   /*
4204     Restrict work on active processes only.
4205   */
4206   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4207   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4208   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4209   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4210   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4211   if (color) {
4212     ierr = PetscFree(xadj);CHKERRQ(ierr);
4213     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4214     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4215   } else {
4216     Mat             subdomain_adj;
4217     IS              new_ranks,new_ranks_contig;
4218     MatPartitioning partitioner;
4219     PetscInt        prank,rstart=0,rend=0;
4220     PetscInt        *is_indices,*oldranks;
4221     PetscBool       aggregate;
4222 
4223     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4224     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4225     prank = rank;
4226     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4227     /*
4228     for (i=0;i<size;i++) {
4229       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4230     }
4231     */
4232     for (i=0;i<xadj[1];i++) {
4233       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4234     }
4235     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4236     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4237     if (aggregate) {
4238       PetscInt    lrows,row,ncols,*cols;
4239       PetscMPIInt nrank;
4240       PetscScalar *vals;
4241 
4242       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4243       lrows = 0;
4244       if (nrank<redprocs) {
4245         lrows = size/redprocs;
4246         if (nrank<size%redprocs) lrows++;
4247       }
4248       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4249       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4250       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4251       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4252       row = nrank;
4253       ncols = xadj[1]-xadj[0];
4254       cols = adjncy;
4255       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4256       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4257       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4258       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4259       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4260       ierr = PetscFree(xadj);CHKERRQ(ierr);
4261       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4262       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4263       ierr = PetscFree(vals);CHKERRQ(ierr);
4264     } else {
4265       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4266     }
4267     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4268 
4269     /* Partition */
4270     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4271     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4272     if (use_vwgt) {
4273       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4274       v_wgt[0] = local_size;
4275       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4276     }
4277     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4278     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4279     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4280     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4281     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4282 
4283     /* renumber new_ranks to avoid "holes" in new set of processors */
4284     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4285     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4286     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4287     if (!redprocs) {
4288       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4289     } else {
4290       PetscInt    idxs[1];
4291       PetscMPIInt tag;
4292       MPI_Request *reqs;
4293 
4294       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4295       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4296       for (i=rstart;i<rend;i++) {
4297         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4298       }
4299       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4300       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4301       ierr = PetscFree(reqs);CHKERRQ(ierr);
4302       ranks_send_to_idx[0] = oldranks[idxs[0]];
4303     }
4304     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4305     /* clean up */
4306     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4307     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4308     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4309     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4310   }
4311   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4312 
4313   /* assemble parallel IS for sends */
4314   i = 1;
4315   if (color) i=0;
4316   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4317   /* get back IS */
4318   *is_sends = ranks_send_to;
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatISSubassemble"
4326 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4327 {
4328   Mat                    local_mat;
4329   IS                     is_sends_internal;
4330   PetscInt               rows,cols,new_local_rows;
4331   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4332   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4333   ISLocalToGlobalMapping l2gmap;
4334   PetscInt*              l2gmap_indices;
4335   const PetscInt*        is_indices;
4336   MatType                new_local_type;
4337   /* buffers */
4338   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4339   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4340   PetscInt               *recv_buffer_idxs_local;
4341   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4342   /* MPI */
4343   MPI_Comm               comm,comm_n;
4344   PetscSubcomm           subcomm;
4345   PetscMPIInt            n_sends,n_recvs,commsize;
4346   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4347   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4348   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4349   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4350   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4351   PetscErrorCode         ierr;
4352 
4353   PetscFunctionBegin;
4354   /* TODO: add missing checks */
4355   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4356   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4357   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4358   PetscValidLogicalCollectiveInt(mat,nis,7);
4359   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4360   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4361   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4362   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4363   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4364   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4365   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4366   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4367     PetscInt mrows,mcols,mnrows,mncols;
4368     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4369     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4370     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4371     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4372     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4373     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4374   }
4375   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4376   PetscValidLogicalCollectiveInt(mat,bs,0);
4377   /* prepare IS for sending if not provided */
4378   if (!is_sends) {
4379     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4380     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4381   } else {
4382     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4383     is_sends_internal = is_sends;
4384   }
4385 
4386   /* get comm */
4387   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4388 
4389   /* compute number of sends */
4390   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4391   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4392 
4393   /* compute number of receives */
4394   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4395   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4396   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4397   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4398   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4399   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4400   ierr = PetscFree(iflags);CHKERRQ(ierr);
4401 
4402   /* restrict comm if requested */
4403   subcomm = 0;
4404   destroy_mat = PETSC_FALSE;
4405   if (restrict_comm) {
4406     PetscMPIInt color,subcommsize;
4407 
4408     color = 0;
4409     if (restrict_full) {
4410       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4411     } else {
4412       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4413     }
4414     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4415     subcommsize = commsize - subcommsize;
4416     /* check if reuse has been requested */
4417     if (reuse == MAT_REUSE_MATRIX) {
4418       if (*mat_n) {
4419         PetscMPIInt subcommsize2;
4420         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4421         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4422         comm_n = PetscObjectComm((PetscObject)*mat_n);
4423       } else {
4424         comm_n = PETSC_COMM_SELF;
4425       }
4426     } else { /* MAT_INITIAL_MATRIX */
4427       PetscMPIInt rank;
4428 
4429       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4430       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4431       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4432       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4433       comm_n = PetscSubcommChild(subcomm);
4434     }
4435     /* flag to destroy *mat_n if not significative */
4436     if (color) destroy_mat = PETSC_TRUE;
4437   } else {
4438     comm_n = comm;
4439   }
4440 
4441   /* prepare send/receive buffers */
4442   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4443   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4444   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4445   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4446   if (nis) {
4447     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4448   }
4449 
4450   /* Get data from local matrices */
4451   if (!isdense) {
4452     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4453     /* TODO: See below some guidelines on how to prepare the local buffers */
4454     /*
4455        send_buffer_vals should contain the raw values of the local matrix
4456        send_buffer_idxs should contain:
4457        - MatType_PRIVATE type
4458        - PetscInt        size_of_l2gmap
4459        - PetscInt        global_row_indices[size_of_l2gmap]
4460        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4461     */
4462   } else {
4463     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4464     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4465     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4466     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4467     send_buffer_idxs[1] = i;
4468     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4469     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4470     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4471     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4472     for (i=0;i<n_sends;i++) {
4473       ilengths_vals[is_indices[i]] = len*len;
4474       ilengths_idxs[is_indices[i]] = len+2;
4475     }
4476   }
4477   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4478   /* additional is (if any) */
4479   if (nis) {
4480     PetscMPIInt psum;
4481     PetscInt j;
4482     for (j=0,psum=0;j<nis;j++) {
4483       PetscInt plen;
4484       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4485       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4486       psum += len+1; /* indices + lenght */
4487     }
4488     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4489     for (j=0,psum=0;j<nis;j++) {
4490       PetscInt plen;
4491       const PetscInt *is_array_idxs;
4492       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4493       send_buffer_idxs_is[psum] = plen;
4494       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4495       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4496       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4497       psum += plen+1; /* indices + lenght */
4498     }
4499     for (i=0;i<n_sends;i++) {
4500       ilengths_idxs_is[is_indices[i]] = psum;
4501     }
4502     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4503   }
4504 
4505   buf_size_idxs = 0;
4506   buf_size_vals = 0;
4507   buf_size_idxs_is = 0;
4508   for (i=0;i<n_recvs;i++) {
4509     buf_size_idxs += (PetscInt)olengths_idxs[i];
4510     buf_size_vals += (PetscInt)olengths_vals[i];
4511     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4512   }
4513   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4514   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4515   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4516 
4517   /* get new tags for clean communications */
4518   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4519   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4520   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4521 
4522   /* allocate for requests */
4523   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4524   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4525   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4526   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4527   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4528   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4529 
4530   /* communications */
4531   ptr_idxs = recv_buffer_idxs;
4532   ptr_vals = recv_buffer_vals;
4533   ptr_idxs_is = recv_buffer_idxs_is;
4534   for (i=0;i<n_recvs;i++) {
4535     source_dest = onodes[i];
4536     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4537     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4538     ptr_idxs += olengths_idxs[i];
4539     ptr_vals += olengths_vals[i];
4540     if (nis) {
4541       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4542       ptr_idxs_is += olengths_idxs_is[i];
4543     }
4544   }
4545   for (i=0;i<n_sends;i++) {
4546     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4547     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4548     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4549     if (nis) {
4550       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4551     }
4552   }
4553   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4554   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4555 
4556   /* assemble new l2g map */
4557   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4558   ptr_idxs = recv_buffer_idxs;
4559   new_local_rows = 0;
4560   for (i=0;i<n_recvs;i++) {
4561     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4562     ptr_idxs += olengths_idxs[i];
4563   }
4564   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4565   ptr_idxs = recv_buffer_idxs;
4566   new_local_rows = 0;
4567   for (i=0;i<n_recvs;i++) {
4568     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4569     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4570     ptr_idxs += olengths_idxs[i];
4571   }
4572   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4573   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4574   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4575 
4576   /* infer new local matrix type from received local matrices type */
4577   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4578   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4579   if (n_recvs) {
4580     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4581     ptr_idxs = recv_buffer_idxs;
4582     for (i=0;i<n_recvs;i++) {
4583       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4584         new_local_type_private = MATAIJ_PRIVATE;
4585         break;
4586       }
4587       ptr_idxs += olengths_idxs[i];
4588     }
4589     switch (new_local_type_private) {
4590       case MATDENSE_PRIVATE:
4591         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4592           new_local_type = MATSEQAIJ;
4593           bs = 1;
4594         } else { /* if I receive only 1 dense matrix */
4595           new_local_type = MATSEQDENSE;
4596           bs = 1;
4597         }
4598         break;
4599       case MATAIJ_PRIVATE:
4600         new_local_type = MATSEQAIJ;
4601         bs = 1;
4602         break;
4603       case MATBAIJ_PRIVATE:
4604         new_local_type = MATSEQBAIJ;
4605         break;
4606       case MATSBAIJ_PRIVATE:
4607         new_local_type = MATSEQSBAIJ;
4608         break;
4609       default:
4610         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4611         break;
4612     }
4613   } else { /* by default, new_local_type is seqdense */
4614     new_local_type = MATSEQDENSE;
4615     bs = 1;
4616   }
4617 
4618   /* create MATIS object if needed */
4619   if (reuse == MAT_INITIAL_MATRIX) {
4620     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4621     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4622   } else {
4623     /* it also destroys the local matrices */
4624     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4625   }
4626   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4627   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4628 
4629   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4630 
4631   /* Global to local map of received indices */
4632   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4633   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4634   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4635 
4636   /* restore attributes -> type of incoming data and its size */
4637   buf_size_idxs = 0;
4638   for (i=0;i<n_recvs;i++) {
4639     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4640     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4641     buf_size_idxs += (PetscInt)olengths_idxs[i];
4642   }
4643   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4644 
4645   /* set preallocation */
4646   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4647   if (!newisdense) {
4648     PetscInt *new_local_nnz=0;
4649 
4650     ptr_vals = recv_buffer_vals;
4651     ptr_idxs = recv_buffer_idxs_local;
4652     if (n_recvs) {
4653       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4654     }
4655     for (i=0;i<n_recvs;i++) {
4656       PetscInt j;
4657       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4658         for (j=0;j<*(ptr_idxs+1);j++) {
4659           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4660         }
4661       } else {
4662         /* TODO */
4663       }
4664       ptr_idxs += olengths_idxs[i];
4665     }
4666     if (new_local_nnz) {
4667       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4668       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4669       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4670       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4671       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4672       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4673     } else {
4674       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4675     }
4676     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4677   } else {
4678     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4679   }
4680 
4681   /* set values */
4682   ptr_vals = recv_buffer_vals;
4683   ptr_idxs = recv_buffer_idxs_local;
4684   for (i=0;i<n_recvs;i++) {
4685     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4686       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4687       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4688       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4689       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4690       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4691     } else {
4692       /* TODO */
4693     }
4694     ptr_idxs += olengths_idxs[i];
4695     ptr_vals += olengths_vals[i];
4696   }
4697   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4700   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4701   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4702   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4703 
4704 #if 0
4705   if (!restrict_comm) { /* check */
4706     Vec       lvec,rvec;
4707     PetscReal infty_error;
4708 
4709     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4710     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4711     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4712     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4713     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4714     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4715     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4716     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4717     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4718   }
4719 #endif
4720 
4721   /* assemble new additional is (if any) */
4722   if (nis) {
4723     PetscInt **temp_idxs,*count_is,j,psum;
4724 
4725     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4726     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4727     ptr_idxs = recv_buffer_idxs_is;
4728     psum = 0;
4729     for (i=0;i<n_recvs;i++) {
4730       for (j=0;j<nis;j++) {
4731         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4732         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4733         psum += plen;
4734         ptr_idxs += plen+1; /* shift pointer to received data */
4735       }
4736     }
4737     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4738     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4739     for (i=1;i<nis;i++) {
4740       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4741     }
4742     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4743     ptr_idxs = recv_buffer_idxs_is;
4744     for (i=0;i<n_recvs;i++) {
4745       for (j=0;j<nis;j++) {
4746         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4747         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4748         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4749         ptr_idxs += plen+1; /* shift pointer to received data */
4750       }
4751     }
4752     for (i=0;i<nis;i++) {
4753       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4754       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4755       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4756     }
4757     ierr = PetscFree(count_is);CHKERRQ(ierr);
4758     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4759     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4760   }
4761   /* free workspace */
4762   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4763   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4764   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4765   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4766   if (isdense) {
4767     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4768     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4769   } else {
4770     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4771   }
4772   if (nis) {
4773     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4774     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4775   }
4776   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4777   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4778   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4779   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4780   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4781   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4782   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4783   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4784   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4785   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4786   ierr = PetscFree(onodes);CHKERRQ(ierr);
4787   if (nis) {
4788     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4789     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4790     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4791   }
4792   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4793   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4794     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4795     for (i=0;i<nis;i++) {
4796       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4797     }
4798     *mat_n = NULL;
4799   }
4800   PetscFunctionReturn(0);
4801 }
4802 
4803 /* temporary hack into ksp private data structure */
4804 #include <petsc/private/kspimpl.h>
4805 
4806 #undef __FUNCT__
4807 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4808 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4809 {
4810   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4811   PC_IS                  *pcis = (PC_IS*)pc->data;
4812   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4813   MatNullSpace           CoarseNullSpace=NULL;
4814   ISLocalToGlobalMapping coarse_islg;
4815   IS                     coarse_is,*isarray;
4816   PetscInt               i,im_active=-1,active_procs=-1;
4817   PetscInt               nis,nisdofs,nisneu,nisvert;
4818   PC                     pc_temp;
4819   PCType                 coarse_pc_type;
4820   KSPType                coarse_ksp_type;
4821   PetscBool              multilevel_requested,multilevel_allowed;
4822   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4823   Mat                    t_coarse_mat_is;
4824   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4825   PetscMPIInt            all_procs;
4826   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4827   PetscBool              compute_vecs = PETSC_FALSE;
4828   PetscScalar            *array;
4829   PetscErrorCode         ierr;
4830 
4831   PetscFunctionBegin;
4832   /* Assign global numbering to coarse dofs */
4833   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4834     PetscInt ocoarse_size;
4835     compute_vecs = PETSC_TRUE;
4836     ocoarse_size = pcbddc->coarse_size;
4837     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4838     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4839     /* see if we can avoid some work */
4840     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4841       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4842       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4843         PC        pc;
4844         PetscBool isbddc;
4845 
4846         /* temporary workaround since PCBDDC does not have a reset method so far */
4847         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4848         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4849         if (isbddc) {
4850           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4851         }
4852         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4853         coarse_reuse = PETSC_FALSE;
4854       } else { /* we can safely reuse already computed coarse matrix */
4855         coarse_reuse = PETSC_TRUE;
4856       }
4857     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4858       coarse_reuse = PETSC_FALSE;
4859     }
4860     /* reset any subassembling information */
4861     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4862     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4863   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4864     coarse_reuse = PETSC_TRUE;
4865   }
4866 
4867   /* count "active" (i.e. with positive local size) and "void" processes */
4868   im_active = !!(pcis->n);
4869   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4870   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4871   void_procs = all_procs-active_procs;
4872   csin_type_simple = PETSC_TRUE;
4873   redist = PETSC_FALSE;
4874   if (pcbddc->current_level && void_procs) {
4875     csin_ml = PETSC_TRUE;
4876     ncoarse_ml = void_procs;
4877     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4878     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4879       csin_ds = PETSC_TRUE;
4880       ncoarse_ds = pcbddc->redistribute_coarse;
4881       redist = PETSC_TRUE;
4882     } else {
4883       csin_ds = PETSC_TRUE;
4884       ncoarse_ds = active_procs;
4885       redist = PETSC_TRUE;
4886     }
4887   } else {
4888     csin_ml = PETSC_FALSE;
4889     ncoarse_ml = all_procs;
4890     if (void_procs) {
4891       csin_ds = PETSC_TRUE;
4892       ncoarse_ds = void_procs;
4893       csin_type_simple = PETSC_FALSE;
4894     } else {
4895       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4896         csin_ds = PETSC_TRUE;
4897         ncoarse_ds = pcbddc->redistribute_coarse;
4898         redist = PETSC_TRUE;
4899       } else {
4900         csin_ds = PETSC_FALSE;
4901         ncoarse_ds = all_procs;
4902       }
4903     }
4904   }
4905 
4906   /*
4907     test if we can go multilevel: three conditions must be satisfied:
4908     - we have not exceeded the number of levels requested
4909     - we can actually subassemble the active processes
4910     - we can find a suitable number of MPI processes where we can place the subassembled problem
4911   */
4912   multilevel_allowed = PETSC_FALSE;
4913   multilevel_requested = PETSC_FALSE;
4914   if (pcbddc->current_level < pcbddc->max_levels) {
4915     multilevel_requested = PETSC_TRUE;
4916     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4917       multilevel_allowed = PETSC_FALSE;
4918     } else {
4919       multilevel_allowed = PETSC_TRUE;
4920     }
4921   }
4922   /* determine number of process partecipating to coarse solver */
4923   if (multilevel_allowed) {
4924     ncoarse = ncoarse_ml;
4925     csin = csin_ml;
4926     redist = PETSC_FALSE;
4927   } else {
4928     ncoarse = ncoarse_ds;
4929     csin = csin_ds;
4930   }
4931 
4932   /* creates temporary l2gmap and IS for coarse indexes */
4933   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4934   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4935 
4936   /* creates temporary MATIS object for coarse matrix */
4937   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4938   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4939   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4940   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4941   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4942   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4943   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4944   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4945   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4946 
4947   /* compute dofs splitting and neumann boundaries for coarse dofs */
4948   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local))) { /* protects from unneded computations */
4949     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4950     const PetscInt         *idxs;
4951     ISLocalToGlobalMapping tmap;
4952 
4953     /* create map between primal indices (in local representative ordering) and local primal numbering */
4954     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4955     /* allocate space for temporary storage */
4956     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4957     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4958     /* allocate for IS array */
4959     nisdofs = pcbddc->n_ISForDofsLocal;
4960     nisneu = !!pcbddc->NeumannBoundariesLocal;
4961     nisvert = 0;
4962     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4963       nisvert = 1;
4964     }
4965     nis = nisdofs + nisneu + nisvert;
4966     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4967     /* dofs splitting */
4968     for (i=0;i<nisdofs;i++) {
4969       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4970       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4971       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4972       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4973       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4974       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4975       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4976       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4977     }
4978     /* neumann boundaries */
4979     if (pcbddc->NeumannBoundariesLocal) {
4980       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4981       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4982       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4983       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4984       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4985       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4986       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4987       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4988     }
4989     /* primal vertices (benign) */
4990     if (pcbddc->benign_saddle_point && pcbddc->user_primal_vertices_local) {
4991       ierr = ISGetLocalSize(pcbddc->user_primal_vertices_local,&tsize);CHKERRQ(ierr);
4992       ierr = ISGetIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4993       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4994       ierr = ISRestoreIndices(pcbddc->user_primal_vertices_local,&idxs);CHKERRQ(ierr);
4995       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4996       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nis-1]);CHKERRQ(ierr);
4997     }
4998     /* free memory */
4999     ierr = PetscFree(tidxs);CHKERRQ(ierr);
5000     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
5001     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
5002   } else {
5003     nis = 0;
5004     nisdofs = 0;
5005     nisneu = 0;
5006     nisvert = 0;
5007     isarray = NULL;
5008   }
5009   /* destroy no longer needed map */
5010   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
5011 
5012   /* restrict on coarse candidates (if needed) */
5013   coarse_mat_is = NULL;
5014   if (csin) {
5015     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
5016       if (redist) {
5017         PetscMPIInt rank;
5018         PetscInt    spc,n_spc_p1,dest[1],destsize;
5019 
5020         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5021         spc = active_procs/ncoarse;
5022         n_spc_p1 = active_procs%ncoarse;
5023         if (im_active) {
5024           destsize = 1;
5025           if (rank > n_spc_p1*(spc+1)-1) {
5026             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
5027           } else {
5028             dest[0] = rank/(spc+1);
5029           }
5030         } else {
5031           destsize = 0;
5032         }
5033         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5034       } else if (csin_type_simple) {
5035         PetscMPIInt rank;
5036         PetscInt    issize,isidx;
5037 
5038         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
5039         if (im_active) {
5040           issize = 1;
5041           isidx = (PetscInt)rank;
5042         } else {
5043           issize = 0;
5044           isidx = -1;
5045         }
5046         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5047       } else { /* get a suitable subassembling pattern from MATIS code */
5048         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
5049       }
5050 
5051       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
5052       if (!redist || ncoarse <= void_procs) {
5053         PetscInt ncoarse_cand,tissize,*nisindices;
5054         PetscInt *coarse_candidates;
5055         const PetscInt* tisindices;
5056 
5057         /* get coarse candidates' ranks in pc communicator */
5058         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5059         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5060         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5061           if (!coarse_candidates[i]) {
5062             coarse_candidates[ncoarse_cand++]=i;
5063           }
5064         }
5065         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5066 
5067 
5068         if (pcbddc->dbg_flag) {
5069           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5070           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5071           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5072           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5073           for (i=0;i<ncoarse_cand;i++) {
5074             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5075           }
5076           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5077           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5078         }
5079         /* shift the pattern on coarse candidates */
5080         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5081         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5082         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5083         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5084         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5085         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5086         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5087       }
5088       if (pcbddc->dbg_flag) {
5089         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5090         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5091         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5092         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5093       }
5094     }
5095     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5096     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5097       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5098     } else { /* this is the last level, so use just receiving processes in subcomm */
5099       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5100     }
5101   } else {
5102     if (pcbddc->dbg_flag) {
5103       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5104       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5105       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5106     }
5107     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5108     coarse_mat_is = t_coarse_mat_is;
5109   }
5110 
5111   /* create local to global scatters for coarse problem */
5112   if (compute_vecs) {
5113     PetscInt lrows;
5114     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5115     if (coarse_mat_is) {
5116       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5117     } else {
5118       lrows = 0;
5119     }
5120     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5121     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5122     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5123     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5124     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5125   }
5126   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5127   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5128 
5129   /* set defaults for coarse KSP and PC */
5130   if (multilevel_allowed) {
5131     coarse_ksp_type = KSPRICHARDSON;
5132     coarse_pc_type = PCBDDC;
5133   } else {
5134     coarse_ksp_type = KSPPREONLY;
5135     coarse_pc_type = PCREDUNDANT;
5136   }
5137 
5138   /* print some info if requested */
5139   if (pcbddc->dbg_flag) {
5140     if (!multilevel_allowed) {
5141       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5142       if (multilevel_requested) {
5143         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5144       } else if (pcbddc->max_levels) {
5145         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5146       }
5147       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5148     }
5149   }
5150 
5151   /* create the coarse KSP object only once with defaults */
5152   if (coarse_mat_is) {
5153     MatReuse coarse_mat_reuse;
5154     PetscViewer dbg_viewer = NULL;
5155     if (pcbddc->dbg_flag) {
5156       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5157       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5158     }
5159     if (!pcbddc->coarse_ksp) {
5160       char prefix[256],str_level[16];
5161       size_t len;
5162       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5163       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5164       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5165       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5166       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5167       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5168       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5169       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5170       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5171       /* prefix */
5172       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5173       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5174       if (!pcbddc->current_level) {
5175         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5176         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5177       } else {
5178         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5179         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5180         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5181         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5182         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5183         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5184       }
5185       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5186       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5187       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5188       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5189       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5190       /* allow user customization */
5191       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5192     }
5193     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5194     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5195     if (nisdofs) {
5196       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5197       for (i=0;i<nisdofs;i++) {
5198         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5199       }
5200     }
5201     if (nisneu) {
5202       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5203       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5204     }
5205     if (nisvert) {
5206       ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr);
5207       ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr);
5208     }
5209 
5210     /* get some info after set from options */
5211     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5212     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5213     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5214     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5215       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5216       isbddc = PETSC_FALSE;
5217     }
5218     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5219     if (isredundant) {
5220       KSP inner_ksp;
5221       PC  inner_pc;
5222       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5223       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5224       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5225     }
5226 
5227     /* assemble coarse matrix */
5228     if (coarse_reuse) {
5229       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5230       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5231       coarse_mat_reuse = MAT_REUSE_MATRIX;
5232     } else {
5233       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5234     }
5235     if (isbddc || isnn) {
5236       if (pcbddc->coarsening_ratio > 1) {
5237         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5238           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5239           if (pcbddc->dbg_flag) {
5240             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5241             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5242             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5243             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5244           }
5245         }
5246         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5247       } else {
5248         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5249         coarse_mat = coarse_mat_is;
5250       }
5251     } else {
5252       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5253     }
5254     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5255 
5256     /* propagate symmetry info of coarse matrix */
5257     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5258     if (pc->pmat->symmetric_set) {
5259       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5260     }
5261     if (pc->pmat->hermitian_set) {
5262       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5263     }
5264     if (pc->pmat->spd_set) {
5265       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5266     }
5267     /* set operators */
5268     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5269     if (pcbddc->dbg_flag) {
5270       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5271     }
5272   } else { /* processes non partecipating to coarse solver (if any) */
5273     coarse_mat = 0;
5274   }
5275   ierr = PetscFree(isarray);CHKERRQ(ierr);
5276 #if 0
5277   {
5278     PetscViewer viewer;
5279     char filename[256];
5280     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5281     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5282     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5283     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5284     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5285   }
5286 #endif
5287 
5288   /* Compute coarse null space (special handling by BDDC only) */
5289 #if 0
5290   if (pcbddc->NullSpace) {
5291     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5292   }
5293 #endif
5294   /* hack */
5295   if (pcbddc->coarse_ksp) {
5296     Vec crhs,csol;
5297 
5298     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5299     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5300     if (!csol) {
5301       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5302     }
5303     if (!crhs) {
5304       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5305     }
5306   }
5307 
5308   /* compute null space for coarse solver if the benign trick has been requested */
5309   if (pcbddc->benign_null) {
5310 
5311     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5312     for (i=0;i<pcbddc->benign_n;i++) {
5313       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5314     }
5315     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5316     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5317     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5318     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5319     if (coarse_mat) {
5320       Vec         nullv;
5321       PetscScalar *array,*array2;
5322       PetscInt    nl;
5323 
5324       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5325       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5326       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5327       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5328       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5329       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5330       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5331       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5332       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5333       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5334     }
5335   }
5336 
5337   if (pcbddc->coarse_ksp) {
5338     PetscBool ispreonly;
5339 
5340     if (CoarseNullSpace) {
5341       PetscBool isnull;
5342       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5343       if (isnull) {
5344         if (isbddc && !pcbddc->benign_saddle_point) {
5345           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5346         } else {
5347           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5348         }
5349       } else {
5350         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5351       }
5352     }
5353     /* setup coarse ksp */
5354     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5355     /* Check coarse problem if in debug mode or if solving with an iterative method */
5356     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5357     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5358       KSP       check_ksp;
5359       KSPType   check_ksp_type;
5360       PC        check_pc;
5361       Vec       check_vec,coarse_vec;
5362       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5363       PetscInt  its;
5364       PetscBool compute_eigs;
5365       PetscReal *eigs_r,*eigs_c;
5366       PetscInt  neigs;
5367       const char *prefix;
5368 
5369       /* Create ksp object suitable for estimation of extreme eigenvalues */
5370       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5371       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5372       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5373       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5374       if (ispreonly) {
5375         check_ksp_type = KSPPREONLY;
5376         compute_eigs = PETSC_FALSE;
5377       } else {
5378         check_ksp_type = KSPGMRES;
5379         compute_eigs = PETSC_TRUE;
5380       }
5381       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5382       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5383       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5384       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5385       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5386       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5387       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5388       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5389       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5390       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5391       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5392       /* create random vec */
5393       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5394       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5395       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5396       if (CoarseNullSpace) {
5397         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5398       }
5399       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5400       /* solve coarse problem */
5401       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5402       if (CoarseNullSpace) {
5403         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5404       }
5405       /* set eigenvalue estimation if preonly has not been requested */
5406       if (compute_eigs) {
5407         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5408         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5409         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5410         lambda_max = eigs_r[neigs-1];
5411         lambda_min = eigs_r[0];
5412         if (pcbddc->use_coarse_estimates) {
5413           if (lambda_max>lambda_min) {
5414             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5415             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5416           }
5417         }
5418       }
5419 
5420       /* check coarse problem residual error */
5421       if (pcbddc->dbg_flag) {
5422         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5423         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5424         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5425         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5426         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5427         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5428         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5429         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5430         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5431         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5432         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5433         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5434         if (CoarseNullSpace) {
5435           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5436         }
5437         if (compute_eigs) {
5438           PetscReal lambda_max_s,lambda_min_s;
5439           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5440           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5441           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5442           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5443           for (i=0;i<neigs;i++) {
5444             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5445           }
5446         }
5447         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5448         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5449       }
5450       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5451       if (compute_eigs) {
5452         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5453         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5454       }
5455     }
5456   }
5457   /* print additional info */
5458   if (pcbddc->dbg_flag) {
5459     /* waits until all processes reaches this point */
5460     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5461     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5462     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5463   }
5464 
5465   /* free memory */
5466   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5467   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5468   PetscFunctionReturn(0);
5469 }
5470 
5471 #undef __FUNCT__
5472 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5473 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5474 {
5475   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5476   PC_IS*         pcis = (PC_IS*)pc->data;
5477   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5478   IS             subset,subset_mult,subset_n;
5479   PetscInt       local_size,coarse_size=0;
5480   PetscInt       *local_primal_indices=NULL;
5481   const PetscInt *t_local_primal_indices;
5482   PetscErrorCode ierr;
5483 
5484   PetscFunctionBegin;
5485   /* Compute global number of coarse dofs */
5486   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5487     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5488   }
5489   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5490   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5491   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5492   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5493   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5494   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5495   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5496   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5497   if (local_size != pcbddc->local_primal_size) {
5498     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5499   }
5500   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5501   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5502   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5503   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5504   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5505 
5506   /* check numbering */
5507   if (pcbddc->dbg_flag) {
5508     PetscScalar coarsesum,*array,*array2;
5509     PetscInt    i;
5510     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5511 
5512     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5513     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5514     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5515     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5516     /* counter */
5517     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5518     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5519     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5520     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5521     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5522     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5523 
5524     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5525     for (i=0;i<pcbddc->local_primal_size;i++) {
5526       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5527     }
5528     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5529     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5530     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5531     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5532     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5533     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5534     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5535     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5536     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5537     for (i=0;i<pcis->n;i++) {
5538       if (array[i] != 0.0 && array[i] != array2[i]) {
5539         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5540         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5541         set_error = PETSC_TRUE;
5542         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5543       }
5544     }
5545     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5546     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5547     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5548     for (i=0;i<pcis->n;i++) {
5549       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5550     }
5551     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5552     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5553     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5554     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5555     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5556     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5557     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5558       PetscInt *gidxs;
5559 
5560       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5561       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5562       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5563       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5564       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5565       for (i=0;i<pcbddc->local_primal_size;i++) {
5566         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5567       }
5568       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5569       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5570     }
5571     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5572     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5573   }
5574   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5575   /* get back data */
5576   *coarse_size_n = coarse_size;
5577   *local_primal_indices_n = local_primal_indices;
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 #undef __FUNCT__
5582 #define __FUNCT__ "PCBDDCGlobalToLocal"
5583 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5584 {
5585   IS             localis_t;
5586   PetscInt       i,lsize,*idxs,n;
5587   PetscScalar    *vals;
5588   PetscErrorCode ierr;
5589 
5590   PetscFunctionBegin;
5591   /* get indices in local ordering exploiting local to global map */
5592   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5593   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5594   for (i=0;i<lsize;i++) vals[i] = 1.0;
5595   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5596   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5597   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5598   if (idxs) { /* multilevel guard */
5599     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5600   }
5601   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5602   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5603   ierr = PetscFree(vals);CHKERRQ(ierr);
5604   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5605   /* now compute set in local ordering */
5606   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5607   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5608   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5609   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5610   for (i=0,lsize=0;i<n;i++) {
5611     if (PetscRealPart(vals[i]) > 0.5) {
5612       lsize++;
5613     }
5614   }
5615   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5616   for (i=0,lsize=0;i<n;i++) {
5617     if (PetscRealPart(vals[i]) > 0.5) {
5618       idxs[lsize++] = i;
5619     }
5620   }
5621   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5622   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5623   *localis = localis_t;
5624   PetscFunctionReturn(0);
5625 }
5626 
5627 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5628 #undef __FUNCT__
5629 #define __FUNCT__ "PCBDDCMatMult_Private"
5630 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5631 {
5632   PCBDDCChange_ctx change_ctx;
5633   PetscErrorCode   ierr;
5634 
5635   PetscFunctionBegin;
5636   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5637   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5638   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5639   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5640   PetscFunctionReturn(0);
5641 }
5642 
5643 #undef __FUNCT__
5644 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5645 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5646 {
5647   PCBDDCChange_ctx change_ctx;
5648   PetscErrorCode   ierr;
5649 
5650   PetscFunctionBegin;
5651   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5652   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5653   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5654   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5655   PetscFunctionReturn(0);
5656 }
5657 
5658 #undef __FUNCT__
5659 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5660 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5661 {
5662   PC_IS               *pcis=(PC_IS*)pc->data;
5663   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5664   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5665   Mat                 S_j;
5666   PetscInt            *used_xadj,*used_adjncy;
5667   PetscBool           free_used_adj;
5668   PetscErrorCode      ierr;
5669 
5670   PetscFunctionBegin;
5671   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5672   free_used_adj = PETSC_FALSE;
5673   if (pcbddc->sub_schurs_layers == -1) {
5674     used_xadj = NULL;
5675     used_adjncy = NULL;
5676   } else {
5677     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5678       used_xadj = pcbddc->mat_graph->xadj;
5679       used_adjncy = pcbddc->mat_graph->adjncy;
5680     } else if (pcbddc->computed_rowadj) {
5681       used_xadj = pcbddc->mat_graph->xadj;
5682       used_adjncy = pcbddc->mat_graph->adjncy;
5683     } else {
5684       PetscBool      flg_row=PETSC_FALSE;
5685       const PetscInt *xadj,*adjncy;
5686       PetscInt       nvtxs;
5687 
5688       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5689       if (flg_row) {
5690         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5691         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5692         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5693         free_used_adj = PETSC_TRUE;
5694       } else {
5695         pcbddc->sub_schurs_layers = -1;
5696         used_xadj = NULL;
5697         used_adjncy = NULL;
5698       }
5699       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5700     }
5701   }
5702 
5703   /* setup sub_schurs data */
5704   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5705   if (!sub_schurs->use_mumps) {
5706     /* pcbddc->ksp_D up to date only if not using MUMPS */
5707     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5708     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5709   } else {
5710     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5711     PetscBool isseqaij;
5712     if (!pcbddc->use_vertices && reuse_solvers) {
5713       PetscInt n_vertices;
5714 
5715       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5716       reuse_solvers = (PetscBool)!n_vertices;
5717     }
5718     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5719     if (!isseqaij) {
5720       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5721       if (matis->A == pcbddc->local_mat) {
5722         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5723         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5724       } else {
5725         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5726       }
5727     }
5728     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5729   }
5730   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5731 
5732   /* free adjacency */
5733   if (free_used_adj) {
5734     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5735   }
5736   PetscFunctionReturn(0);
5737 }
5738 
5739 #undef __FUNCT__
5740 #define __FUNCT__ "PCBDDCInitSubSchurs"
5741 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5742 {
5743   PC_IS               *pcis=(PC_IS*)pc->data;
5744   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5745   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5746   PCBDDCGraph         graph;
5747   PetscErrorCode      ierr;
5748 
5749   PetscFunctionBegin;
5750   /* attach interface graph for determining subsets */
5751   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5752     IS       verticesIS,verticescomm;
5753     PetscInt vsize,*idxs;
5754 
5755     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5756     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5757     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5758     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5759     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5760     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5761     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5762     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5763     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5764     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5765     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5766 /*
5767     if (pcbddc->dbg_flag) {
5768       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5769     }
5770 */
5771   } else {
5772     graph = pcbddc->mat_graph;
5773   }
5774 
5775   /* sub_schurs init */
5776   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5777 
5778   /* free graph struct */
5779   if (pcbddc->sub_schurs_rebuild) {
5780     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5781   }
5782   PetscFunctionReturn(0);
5783 }
5784 
5785 #undef __FUNCT__
5786 #define __FUNCT__ "PCBDDCCheckOperator"
5787 PetscErrorCode PCBDDCCheckOperator(PC pc)
5788 {
5789   PC_IS               *pcis=(PC_IS*)pc->data;
5790   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5791   PetscErrorCode      ierr;
5792 
5793   PetscFunctionBegin;
5794   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5795     IS             zerodiag = NULL;
5796     Mat            S_j,B0_B=NULL;
5797     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5798     PetscScalar    *p0_check,*array,*array2;
5799     PetscReal      norm;
5800     PetscInt       i;
5801 
5802     /* B0 and B0_B */
5803     if (zerodiag) {
5804       IS       dummy;
5805 
5806       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5807       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5808       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5809       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5810     }
5811     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5812     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5813     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5814     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5815     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5816     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5817     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5818     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5819     /* S_j */
5820     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5821     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5822 
5823     /* mimic vector in \widetilde{W}_\Gamma */
5824     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5825     /* continuous in primal space */
5826     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5827     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5828     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5829     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5830     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5831     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5832     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5833     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5834     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5835     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5836     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5837     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5838     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5839     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5840 
5841     /* assemble rhs for coarse problem */
5842     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5843     /* local with Schur */
5844     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5845     if (zerodiag) {
5846       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5847       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5848       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5849       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5850     }
5851     /* sum on primal nodes the local contributions */
5852     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5853     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5854     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5855     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5856     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5857     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5858     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5859     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5860     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5861     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5862     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5863     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5864     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5865     /* scale primal nodes (BDDC sums contibutions) */
5866     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5867     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5868     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5869     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5870     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5871     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5872     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5873     /* global: \widetilde{B0}_B w_\Gamma */
5874     if (zerodiag) {
5875       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5876       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5877       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5878       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5879     }
5880     /* BDDC */
5881     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5882     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5883 
5884     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5885     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5886     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5887     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5888     for (i=0;i<pcbddc->benign_n;i++) {
5889       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5890     }
5891     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5892     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5893     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5894     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5895     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5896     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5897   }
5898   PetscFunctionReturn(0);
5899 }
5900