xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 9f47a83aa3df29289204265add56bd3ef4ab9aa1)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/ksp/pc/impls/bddc/bddc.h>
3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
7 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
8 
9 /* TODO: add reuse flag */
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqAIJCompress"
12 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
13 {
14   Mat            Bt;
15   PetscScalar    *a,*bdata;
16   const PetscInt *ii,*ij;
17   PetscInt       m,n,i,nnz,*bii,*bij;
18   PetscBool      flg_row;
19   PetscErrorCode ierr;
20 
21   PetscFunctionBegin;
22   ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr);
23   ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
24   ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr);
25   nnz = n;
26   for (i=0;i<ii[n];i++) {
27     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
28   }
29   ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr);
30   ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr);
31   ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr);
32   nnz = 0;
33   bii[0] = 0;
34   for (i=0;i<n;i++) {
35     PetscInt j;
36     for (j=ii[i];j<ii[i+1];j++) {
37       PetscScalar entry = a[j];
38       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) {
39         bij[nnz] = ij[j];
40         bdata[nnz] = entry;
41         nnz++;
42       }
43     }
44     bii[i+1] = nnz;
45   }
46   ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr);
47   ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr);
48   ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr);
49   {
50     Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
51     b->free_a = PETSC_TRUE;
52     b->free_ij = PETSC_TRUE;
53   }
54   *B = Bt;
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatDetectDisconnectedComponents"
60 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
61 {
62   Mat                    B;
63   IS                     is_dummy,*cc_n;
64   ISLocalToGlobalMapping l2gmap_dummy;
65   PCBDDCGraph            graph;
66   PetscInt               i,n;
67   PetscInt               *xadj,*adjncy;
68   PetscInt               *xadj_filtered,*adjncy_filtered;
69   PetscBool              flg_row,isseqaij;
70   PetscErrorCode         ierr;
71 
72   PetscFunctionBegin;
73   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
74   if (!isseqaij && filter) {
75     PetscBool isseqdense;
76 
77     ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr);
78     if (!isseqdense) {
79       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
80     } else { /* TODO: rectangular case and LDA */
81       PetscScalar *array;
82       PetscReal   chop=1.e-6;
83 
84       ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
85       ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr);
86       ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr);
87       for (i=0;i<n;i++) {
88         PetscInt j;
89         for (j=i+1;j<n;j++) {
90           PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
91           if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
92           if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
93         }
94       }
95       ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr);
96       ierr = MatConvert(B,MATSEQAIJ,MAT_REUSE_MATRIX,&B);CHKERRQ(ierr);
97     }
98   } else {
99     B = A;
100   }
101   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
102 
103   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
104   if (filter) {
105     PetscScalar *data;
106     PetscInt    j,cum;
107 
108     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
109     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
110     cum = 0;
111     for (i=0;i<n;i++) {
112       PetscInt t;
113 
114       for (j=xadj[i];j<xadj[i+1];j++) {
115         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
116           continue;
117         }
118         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
119       }
120       t = xadj_filtered[i];
121       xadj_filtered[i] = cum;
122       cum += t;
123     }
124     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
125   } else {
126     xadj_filtered = NULL;
127     adjncy_filtered = NULL;
128   }
129 
130   /* compute local connected components using PCBDDCGraph */
131   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
132   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
133   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
134   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
135   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
136   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
137   if (xadj_filtered) {
138     graph->xadj = xadj_filtered;
139     graph->adjncy = adjncy_filtered;
140   } else {
141     graph->xadj = xadj;
142     graph->adjncy = adjncy;
143   }
144   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
145   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
146 
147   /* partial clean up */
148   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
149   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
150   if (A != B) {
151     ierr = MatDestroy(&B);CHKERRQ(ierr);
152   }
153 
154   /* get back data */
155   if (ncc) *ncc = graph->ncc;
156   if (cc) {
157     ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
158     for (i=0;i<graph->ncc;i++) {
159       ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
160     }
161     *cc = cc_n;
162   }
163   /* clean up graph */
164   graph->xadj = 0;
165   graph->adjncy = 0;
166   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "PCBDDCBenignCheck"
172 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
173 {
174   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
175   PC_IS*         pcis = (PC_IS*)(pc->data);
176   IS             dirIS = NULL;
177   PetscInt       i;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
182   if (zerodiag) {
183     Mat            A;
184     Vec            vec3_N;
185     PetscScalar    *vals;
186     const PetscInt *idxs;
187     PetscInt       nz;
188 
189     /* p0 */
190     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
191     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
192     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
193     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
194     for (i=0;i<nz;i++) vals[i] = 1.;
195     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
196     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
197     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
198     /* v_I */
199     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
200     for (i=0;i<nz;i++) vals[i] = 0.;
201     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
202     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
203     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
204     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
205     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
206     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
207     if (dirIS) {
208       PetscInt n;
209 
210       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
211       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
212       for (i=0;i<n;i++) vals[i] = 0.;
213       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
214       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
215     }
216     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
217     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
218     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
219     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
220     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
221     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
222     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
223     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
224       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
225     }
226     ierr = PetscFree(vals);CHKERRQ(ierr);
227     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
228   }
229   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
230 
231   /* check PCBDDCBenignGetOrSetP0 */
232   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
233   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
234   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
235   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
236   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
237   for (i=0;i<pcbddc->benign_n;i++) {
238     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
239       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
240     }
241   }
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
247 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
248 {
249   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
250   IS             pressures,zerodiag,*zerodiag_subs;
251   PetscInt       nz,n;
252   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
257   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
258   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
259   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
260   pcbddc->benign_original_mat = pcbddc->local_mat;
261   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
262      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
263      Checks if all the pressure dofs in each subdomain have a zero diagonal
264      If not, a change of basis on pressures is not needed
265      since the local Schur complements are SPD
266   */
267   has_null_pressures = PETSC_TRUE;
268   have_null = PETSC_TRUE;
269   if (pcbddc->n_ISForDofsLocal) {
270     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
271 
272     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
273     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
274     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
275     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
276     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
277     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
278     if (!sorted) {
279       ierr = ISSort(pressures);CHKERRQ(ierr);
280     }
281   } else {
282     pressures = NULL;
283   }
284   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
285   /* TODO: add check for shared dofs and raise error */
286   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
287   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
288   if (!sorted) {
289     ierr = ISSort(zerodiag);CHKERRQ(ierr);
290   }
291   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
292   if (!nz) {
293     if (n) have_null = PETSC_FALSE;
294     has_null_pressures = PETSC_FALSE;
295     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
296   }
297   recompute_zerodiag = PETSC_FALSE;
298   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
299   zerodiag_subs = NULL;
300   pcbddc->benign_n = 0;
301   if (has_null_pressures) {
302     IS       *subs;
303     PetscInt nsubs,i;
304 
305     subs = pcbddc->local_subs;
306     nsubs = pcbddc->n_local_subs;
307     if (nsubs > 1) {
308       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
309       for (i=0;i<nsubs;i++) {
310         ISLocalToGlobalMapping l2g;
311         IS                     t_zerodiag_subs;
312         PetscInt               nl;
313 
314         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
315         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
316         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
317         if (nl) {
318           PetscBool valid = PETSC_TRUE;
319 
320           if (pressures) {
321             IS t_pressure_subs;
322             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
323             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
324             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
325           }
326           if (valid) {
327             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
328             pcbddc->benign_n++;
329           } else {
330             recompute_zerodiag = PETSC_TRUE;
331           }
332         }
333         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
334         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
335       }
336     } else { /* there's just one subdomain (or zero if they have not been detected */
337       PetscBool valid = PETSC_TRUE;
338       if (pressures) {
339         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
340       }
341       if (valid) {
342         pcbddc->benign_n = 1;
343         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
344         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
345         zerodiag_subs[0] = zerodiag;
346       }
347     }
348   }
349 
350   if (!pcbddc->benign_n) {
351     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
352     recompute_zerodiag = PETSC_FALSE;
353     has_null_pressures = PETSC_FALSE;
354     have_null = PETSC_FALSE;
355   }
356 
357   /* final check for null pressures */
358   if (zerodiag && pressures) {
359     PetscInt nz,np;
360     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
361     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
362     if (nz != np) have_null = PETSC_FALSE;
363   }
364 
365   if (recompute_zerodiag) {
366     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
367     if (pcbddc->benign_n == 1) {
368       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
369       zerodiag = zerodiag_subs[0];
370     } else {
371       PetscInt i,nzn,*new_idxs;
372 
373       nzn = 0;
374       for (i=0;i<pcbddc->benign_n;i++) {
375         PetscInt ns;
376         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
377         nzn += ns;
378       }
379       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
380       nzn = 0;
381       for (i=0;i<pcbddc->benign_n;i++) {
382         PetscInt ns,*idxs;
383         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
384         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
385         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
386         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
387         nzn += ns;
388       }
389       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
390       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
391     }
392     have_null = PETSC_FALSE;
393   }
394 
395   if (has_null_pressures) {
396     IS             zerodiagc;
397     const PetscInt *idxs,*idxsc;
398     PetscInt       i,s,*nnz;
399     Mat            M;
400 
401     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
402     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
403     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
404     /* local change of basis for pressures */
405     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
406     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
407     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
408     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
409     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
410     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
411     for (i=0;i<pcbddc->benign_n;i++) {
412       PetscInt nzs,j;
413 
414       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
415       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
416       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
417       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
418       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
419     }
420     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
421     ierr = PetscFree(nnz);CHKERRQ(ierr);
422     /* set identity on velocities */
423     for (i=0;i<n-nz;i++) {
424       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
425     }
426     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
427     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
428     ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
429     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
430     /* set change on pressures */
431     for (s=0;s<pcbddc->benign_n;s++) {
432       PetscScalar *array;
433       PetscInt    nzs;
434 
435       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
436       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
437       for (i=0;i<nzs-1;i++) {
438         PetscScalar vals[2];
439         PetscInt    cols[2];
440 
441         cols[0] = idxs[i];
442         cols[1] = idxs[nzs-1];
443         vals[0] = 1.;
444         vals[1] = 1.;
445         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
446       }
447       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
448       for (i=0;i<nzs-1;i++) array[i] = -1.;
449       array[nzs-1] = 1.;
450       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
451       /* store local idxs for p0 */
452       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
453       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
454       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
455       ierr = PetscFree(array);CHKERRQ(ierr);
456     }
457     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459     /* TODO: need optimization? */
460     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
461     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
462     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
463     ierr = MatDestroy(&M);CHKERRQ(ierr);
464     /* store global idxs for p0 */
465     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
466   }
467   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
468   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
469 
470   /* determines if the coarse solver will be singular or not */
471   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
472   *zerodiaglocal = zerodiag;
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
478 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
479 {
480   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
481   PetscErrorCode ierr;
482 
483   PetscFunctionBegin;
484   if (!pcbddc->benign_sf) {
485     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
486     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
487   }
488   if (get) { /* use SF to get values */
489     PetscScalar *array;
490 
491     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
492     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
493     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
494     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
495   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
496     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
497     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
498     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
499   }
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
505 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
506 {
507   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
508   PetscErrorCode ierr;
509 
510   PetscFunctionBegin;
511   /* TODO: add error checking
512     - avoid nested pop (or push) calls.
513     - cannot push before pop.
514     - cannot call this if pcbddc->local_mat is NULL
515   */
516   if (!pcbddc->benign_n) {
517     PetscFunctionReturn(0);
518   }
519   if (pop) {
520     IS       is_p0;
521     MatReuse reuse;
522 
523     /* extract B_0 */
524     reuse = MAT_INITIAL_MATRIX;
525     if (pcbddc->benign_B0) {
526       reuse = MAT_REUSE_MATRIX;
527     }
528     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
529     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
530     /* remove rows and cols from local problem */
531     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
532     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
533     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
534   } else { /* push */
535     PetscInt i;
536 
537     for (i=0;i<pcbddc->benign_n;i++) {
538       PetscScalar *B0_vals;
539       PetscInt    *B0_cols,B0_ncol;
540 
541       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
542       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
543       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
544       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
545       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
546     }
547     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "PCBDDCAdaptiveSelection"
555 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
556 {
557   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
558   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
559   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
560   PetscBLASInt    *B_iwork,*B_ifail;
561   PetscScalar     *work,lwork;
562   PetscScalar     *St,*S,*eigv;
563   PetscScalar     *Sarray,*Starray;
564   PetscReal       *eigs,thresh;
565   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
566   PetscBool       allocated_S_St;
567 #if defined(PETSC_USE_COMPLEX)
568   PetscReal       *rwork;
569 #endif
570   PetscErrorCode  ierr;
571 
572   PetscFunctionBegin;
573   if (!sub_schurs->use_mumps) {
574     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
575   }
576 
577   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
578     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
579   }
580 
581   if (pcbddc->dbg_flag) {
582     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
584     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
585     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
586   }
587 
588   if (pcbddc->dbg_flag) {
589     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
590   }
591 
592   /* max size of subsets */
593   mss = 0;
594   for (i=0;i<sub_schurs->n_subs;i++) {
595     PetscInt subset_size;
596 
597     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
598     mss = PetscMax(mss,subset_size);
599   }
600 
601   /* min/max and threshold */
602   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
603   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
604   nmax = PetscMax(nmin,nmax);
605   allocated_S_St = PETSC_FALSE;
606   if (nmin) {
607     allocated_S_St = PETSC_TRUE;
608   }
609 
610   /* allocate lapack workspace */
611   cum = cum2 = 0;
612   maxneigs = 0;
613   for (i=0;i<sub_schurs->n_subs;i++) {
614     PetscInt n,subset_size;
615 
616     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
617     n = PetscMin(subset_size,nmax);
618     cum += subset_size;
619     cum2 += subset_size*n;
620     maxneigs = PetscMax(maxneigs,n);
621   }
622   if (mss) {
623     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
624       PetscBLASInt B_itype = 1;
625       PetscBLASInt B_N = mss;
626       PetscReal    zero = 0.0;
627       PetscReal    eps = 0.0; /* dlamch? */
628 
629       B_lwork = -1;
630       S = NULL;
631       St = NULL;
632       eigs = NULL;
633       eigv = NULL;
634       B_iwork = NULL;
635       B_ifail = NULL;
636 #if defined(PETSC_USE_COMPLEX)
637       rwork = NULL;
638 #endif
639       thresh = 1.0;
640       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
641 #if defined(PETSC_USE_COMPLEX)
642       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
643 #else
644       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
645 #endif
646       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
647       ierr = PetscFPTrapPop();CHKERRQ(ierr);
648     } else {
649         /* TODO */
650     }
651   } else {
652     lwork = 0;
653   }
654 
655   nv = 0;
656   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
657     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
658   }
659   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
660   if (allocated_S_St) {
661     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
662   }
663   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
664 #if defined(PETSC_USE_COMPLEX)
665   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
666 #endif
667   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
668                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
669                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
670                       nv+cum,&pcbddc->adaptive_constraints_idxs,
671                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
672   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
673 
674   maxneigs = 0;
675   cum = cum2 = cumarray = 0;
676   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
677   pcbddc->adaptive_constraints_data_ptr[0] = 0;
678   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
679     const PetscInt *idxs;
680 
681     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
682     for (cum=0;cum<nv;cum++) {
683       pcbddc->adaptive_constraints_n[cum] = 1;
684       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
685       pcbddc->adaptive_constraints_data[cum] = 1.0;
686       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
687       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
688     }
689     cum2 = cum;
690     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
691   }
692 
693   if (mss) { /* multilevel */
694     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
695     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
696   }
697 
698   for (i=0;i<sub_schurs->n_subs;i++) {
699 
700     const PetscInt *idxs;
701     PetscReal      infty = PETSC_MAX_REAL;
702     PetscInt       j,subset_size,eigs_start = 0;
703     PetscBLASInt   B_N;
704     PetscBool      same_data = PETSC_FALSE;
705 
706     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
707     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
708     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
709       if (sub_schurs->is_hermitian) {
710         PetscInt j,k;
711         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
712           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
713           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
714         }
715         for (j=0;j<subset_size;j++) {
716           for (k=j;k<subset_size;k++) {
717             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
718             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
719           }
720         }
721       } else {
722         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
723         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
724       }
725     } else {
726       S = Sarray + cumarray;
727       St = Starray + cumarray;
728     }
729 
730     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
731     /* see if we can save some work */
732     if (sub_schurs->n_subs == 1) {
733       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
734     }
735 
736     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
737       B_neigs = 0;
738     } else {
739       /* Threshold: this is an heuristic for edges */
740       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
741       if (nmin) {
742         Mat SM,StM;
743         PetscInt j,k,nccs,nccst;
744 
745         for (j=0;j<subset_size;j++) {
746           for (k=j;k<subset_size;k++) {
747             S [k*subset_size+j] = S [j*subset_size+k];
748             St[k*subset_size+j] = St[j*subset_size+k];
749           }
750         }
751         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,S,&SM);CHKERRQ(ierr);
752         ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,St,&StM);CHKERRQ(ierr);
753         ierr = MatDetectDisconnectedComponents(SM,PETSC_TRUE,&nccs,NULL);CHKERRQ(ierr);
754         ierr = MatDetectDisconnectedComponents(StM,PETSC_TRUE,&nccst,NULL);CHKERRQ(ierr);
755         if (nccs != 1 || nccst != 1) {
756           PetscPrintf(PETSC_COMM_SELF,"[%d] Found disc %d %d (size %d)\n",PetscGlobalRank,nccs,nccst,subset_size);
757         }
758         ierr = MatDestroy(&SM);CHKERRQ(ierr);
759         ierr = MatDestroy(&StM);CHKERRQ(ierr);
760       }
761 
762       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
763         PetscBLASInt B_itype = 1;
764         PetscBLASInt B_IL, B_IU;
765         PetscReal    eps = -1.0; /* dlamch? */
766         PetscInt     nmin_s;
767 
768         if (pcbddc->dbg_flag) {
769           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
770         }
771 
772         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
773         if (thresh > 1.+PETSC_SMALL) {
774 
775           /* ask for eigenvalues larger than thresh */
776 #if defined(PETSC_USE_COMPLEX)
777           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
778 #else
779           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
780 #endif
781         } else {
782           B_IU = PetscMax(1,PetscMin(B_N,nmax));
783           B_IL = 1;
784 #if defined(PETSC_USE_COMPLEX)
785           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
786 #else
787           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
788 #endif
789         }
790         ierr = PetscFPTrapPop();CHKERRQ(ierr);
791         if (B_ierr) {
792           if (B_ierr < 0 ) {
793             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
794           } else if (B_ierr <= B_N) {
795             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
796           } else {
797             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
798           }
799         }
800 
801         if (B_neigs > nmax) {
802           if (pcbddc->dbg_flag) {
803             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
804           }
805           eigs_start = B_neigs -nmax;
806           B_neigs = nmax;
807         }
808 
809         nmin_s = PetscMin(nmin,B_N);
810         if (B_neigs < nmin_s) {
811           PetscBLASInt B_neigs2;
812 
813           B_IU = B_N - B_neigs;
814           B_IL = B_N - nmin_s + 1;
815           if (pcbddc->dbg_flag) {
816             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
817           }
818           if (sub_schurs->is_hermitian) {
819             PetscInt j;
820             for (j=0;j<subset_size;j++) {
821               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
822             }
823             for (j=0;j<subset_size;j++) {
824               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
825             }
826           } else {
827             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
828             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
829           }
830           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
831 #if defined(PETSC_USE_COMPLEX)
832           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
833 #else
834           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
835 #endif
836           ierr = PetscFPTrapPop();CHKERRQ(ierr);
837           B_neigs += B_neigs2;
838         }
839         if (B_ierr) {
840           if (B_ierr < 0 ) {
841             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
842           } else if (B_ierr <= B_N) {
843             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
844           } else {
845             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
846           }
847         }
848         if (pcbddc->dbg_flag) {
849           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
850           for (j=0;j<B_neigs;j++) {
851             if (eigs[j] == 0.0) {
852               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
853             } else {
854               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
855             }
856           }
857         }
858       } else {
859           /* TODO */
860       }
861     }
862     maxneigs = PetscMax(B_neigs,maxneigs);
863     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
864     if (B_neigs) {
865       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
866 
867       if (pcbddc->dbg_flag > 1) {
868         PetscInt ii;
869         for (ii=0;ii<B_neigs;ii++) {
870           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
871           for (j=0;j<B_N;j++) {
872 #if defined(PETSC_USE_COMPLEX)
873             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
874             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
875             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
876 #else
877             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
878 #endif
879           }
880         }
881       }
882 #if 0
883       for (j=0;j<B_neigs;j++) {
884         PetscBLASInt Blas_N,Blas_one = 1.0;
885         PetscScalar norm;
886         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
887         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
888                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
889         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
890           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
891         } else {
892           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
893         }
894         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
895       }
896 #endif
897       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
898       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
899       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
900       cum++;
901     }
902     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
903     /* shift for next computation */
904     cumarray += subset_size*subset_size;
905   }
906   if (pcbddc->dbg_flag) {
907     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
908   }
909 
910   if (mss) {
911     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
912     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
913     /* destroy matrices (junk) */
914     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
915     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
916   }
917   if (allocated_S_St) {
918     ierr = PetscFree2(S,St);CHKERRQ(ierr);
919   }
920   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
921 #if defined(PETSC_USE_COMPLEX)
922   ierr = PetscFree(rwork);CHKERRQ(ierr);
923 #endif
924   if (pcbddc->dbg_flag) {
925     PetscInt maxneigs_r;
926     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
927     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "PCBDDCSetUpSolvers"
934 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
935 {
936   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
937   PetscScalar    *coarse_submat_vals;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   /* Setup local scatters R_to_B and (optionally) R_to_D */
942   /* PCBDDCSetUpLocalWorkVectors should be called first! */
943   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
944 
945   /* Setup local neumann solver ksp_R */
946   /* PCBDDCSetUpLocalScatters should be called first! */
947   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
948 
949   /* Change global null space passed in by the user if change of basis has been requested */
950   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
951     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
952   }
953 
954   /*
955      Setup local correction and local part of coarse basis.
956      Gives back the dense local part of the coarse matrix in column major ordering
957   */
958   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
959 
960   /* Compute total number of coarse nodes and setup coarse solver */
961   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
962 
963   /* free */
964   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "PCBDDCResetCustomization"
970 PetscErrorCode PCBDDCResetCustomization(PC pc)
971 {
972   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
977   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
978   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
979   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
980   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
981   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
982   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
983   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
984   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
985   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
986   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "PCBDDCResetTopography"
992 PetscErrorCode PCBDDCResetTopography(PC pc)
993 {
994   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
995   PetscInt       i;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
1000   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1001   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1002   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
1003   for (i=0;i<pcbddc->n_local_subs;i++) {
1004     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
1005   }
1006   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
1007   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "PCBDDCResetSolvers"
1013 PetscErrorCode PCBDDCResetSolvers(PC pc)
1014 {
1015   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1016   PetscScalar    *array;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
1021   if (pcbddc->coarse_phi_B) {
1022     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
1023     ierr = PetscFree(array);CHKERRQ(ierr);
1024   }
1025   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1026   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1027   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1028   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1029   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1030   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1031   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1032   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1033   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1034   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1035   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1036   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1037   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1038   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
1039   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
1040   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
1041   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
1042   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1043   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1044   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1045   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
1046   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
1047   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
1048   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
1049   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
1050   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1051   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
1052   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
1053   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
1054   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
1060 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
1061 {
1062   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1063   PC_IS          *pcis = (PC_IS*)pc->data;
1064   VecType        impVecType;
1065   PetscInt       n_constraints,n_R,old_size;
1066   PetscErrorCode ierr;
1067 
1068   PetscFunctionBegin;
1069   if (!pcbddc->ConstraintMatrix) {
1070     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
1071   }
1072   /* get sizes */
1073   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
1074   n_R = pcis->n - pcbddc->n_vertices;
1075   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
1076   /* local work vectors (try to avoid unneeded work)*/
1077   /* R nodes */
1078   old_size = -1;
1079   if (pcbddc->vec1_R) {
1080     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
1081   }
1082   if (n_R != old_size) {
1083     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
1084     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
1085     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
1086     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
1087     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
1088     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
1089   }
1090   /* local primal dofs */
1091   old_size = -1;
1092   if (pcbddc->vec1_P) {
1093     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
1094   }
1095   if (pcbddc->local_primal_size != old_size) {
1096     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
1097     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1098     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1099     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1100   }
1101   /* local explicit constraints */
1102   old_size = -1;
1103   if (pcbddc->vec1_C) {
1104     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1105   }
1106   if (n_constraints && n_constraints != old_size) {
1107     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1108     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1109     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1110     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "PCBDDCSetUpCorrection"
1117 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1118 {
1119   PetscErrorCode  ierr;
1120   /* pointers to pcis and pcbddc */
1121   PC_IS*          pcis = (PC_IS*)pc->data;
1122   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1123   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1124   /* submatrices of local problem */
1125   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1126   /* submatrices of benign trick */
1127   Mat             B0_V = NULL;
1128   /* submatrices of local coarse problem */
1129   Mat             S_VV,S_CV,S_VC,S_CC;
1130   /* working matrices */
1131   Mat             C_CR;
1132   /* additional working stuff */
1133   PC              pc_R;
1134   Mat             F;
1135   PetscBool       isLU,isCHOL,isILU;
1136 
1137   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1138   PetscScalar     *work;
1139   PetscInt        *idx_V_B;
1140   PetscInt        n,n_vertices,n_constraints,*p0_lidx_I;
1141   PetscInt        i,n_R,n_D,n_B;
1142   PetscBool       unsymmetric_check;
1143   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
1144   MatType         impMatType;
1145   /* some shortcuts to scalars */
1146   PetscScalar     one=1.0,m_one=-1.0;
1147 
1148   PetscFunctionBegin;
1149   n_vertices = pcbddc->n_vertices;
1150   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1151   /* Set Non-overlapping dimensions */
1152   n_B = pcis->n_B;
1153   n_D = pcis->n - n_B;
1154   n_R = pcis->n - n_vertices;
1155 
1156   /* Set types for local objects needed by BDDC precondtioner */
1157   impMatType = MATSEQDENSE;
1158 
1159   /* vertices in boundary numbering */
1160   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1161   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1162   if (i != n_vertices) {
1163     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1164   }
1165 
1166   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1167   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1168   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1169   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1170   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1171   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1172   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1173   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1174   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1175   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1176 
1177   unsymmetric_check = PETSC_FALSE;
1178   /* allocate workspace */
1179   n = 0;
1180   if (n_constraints) {
1181     n += n_R*n_constraints;
1182   }
1183   if (n_vertices) {
1184     n = PetscMax(2*n_R*n_vertices,n);
1185     n = PetscMax((n_R+n_B)*n_vertices,n);
1186   }
1187   if (!pcbddc->symmetric_primal) {
1188     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
1189     unsymmetric_check = PETSC_TRUE;
1190   }
1191   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1192 
1193   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1194   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1195   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1196   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1197   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1198   if (isLU || isILU || isCHOL) {
1199     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1200   } else if (sub_schurs->reuse_mumps) {
1201     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1202     MatFactorType type;
1203 
1204     F = reuse_mumps->F;
1205     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1206     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1207   } else {
1208     F = NULL;
1209   }
1210 
1211   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1212   if (n_constraints) {
1213     Mat         M1,M2,M3;
1214     Mat         auxmat;
1215     IS          is_aux;
1216     PetscScalar *array,*array2;
1217 
1218     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1219     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1220 
1221     /* Extract constraints on R nodes: C_{CR}  */
1222     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1223     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1224     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1225 
1226     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1227     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1228     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1229     for (i=0;i<n_constraints;i++) {
1230       const PetscScalar *row_cmat_values;
1231       const PetscInt    *row_cmat_indices;
1232       PetscInt          size_of_constraint,j;
1233 
1234       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1235       for (j=0;j<size_of_constraint;j++) {
1236         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
1237       }
1238       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1239     }
1240     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1241     if (F) {
1242       Mat B;
1243 
1244       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1245       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1246       ierr = MatDestroy(&B);CHKERRQ(ierr);
1247     } else {
1248       PetscScalar *marr;
1249 
1250       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1251       for (i=0;i<n_constraints;i++) {
1252         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1253         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
1254         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1255         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1256         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1257       }
1258       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1259     }
1260     if (!pcbddc->switch_static) {
1261       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1262       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1263       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1264       for (i=0;i<n_constraints;i++) {
1265         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
1266         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1267         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1270         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1271       }
1272       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1273       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1274       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1275     } else {
1276       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1277       pcbddc->local_auxmat2 = local_auxmat2_R;
1278       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1279     }
1280     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1281     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1282     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1283     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1284     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1285     if (isCHOL) {
1286       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1287     } else {
1288       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1289     }
1290     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1291     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1292     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1293     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1294     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1295     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1296     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1297     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1298     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1299     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1300   }
1301   /* Get submatrices from subdomain matrix */
1302   if (pcbddc->benign_n) {
1303     IS        dummy;
1304     Mat       B0_R;
1305     PetscReal norm;
1306 
1307     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1308     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1309     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1310     if (norm > PETSC_SMALL) {
1311       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1312     }
1313     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1314     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1315   }
1316 
1317   if (n_vertices) {
1318     IS is_aux;
1319 
1320     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1321       IS tis;
1322 
1323       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1324       ierr = ISSort(tis);CHKERRQ(ierr);
1325       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1326       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1327     } else {
1328       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1329     }
1330     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1331     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1332     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1333     if (pcbddc->benign_n) {
1334       IS dummy;
1335 
1336       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1337       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1338       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1339     }
1340     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1341   }
1342 
1343   /* Matrix of coarse basis functions (local) */
1344   if (pcbddc->coarse_phi_B) {
1345     PetscInt on_B,on_primal,on_D=n_D;
1346     if (pcbddc->coarse_phi_D) {
1347       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1348     }
1349     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1350     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1351       PetscScalar *marray;
1352 
1353       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1354       ierr = PetscFree(marray);CHKERRQ(ierr);
1355       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1356       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1357       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1358       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1359     }
1360   }
1361 
1362   if (!pcbddc->coarse_phi_B) {
1363     PetscScalar *marray;
1364 
1365     n = n_B*pcbddc->local_primal_size;
1366     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1367       n += n_D*pcbddc->local_primal_size;
1368     }
1369     if (!pcbddc->symmetric_primal) {
1370       n *= 2;
1371     }
1372     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1373     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1374     n = n_B*pcbddc->local_primal_size;
1375     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1376       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1377       n += n_D*pcbddc->local_primal_size;
1378     }
1379     if (!pcbddc->symmetric_primal) {
1380       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1381       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1382         n = n_B*pcbddc->local_primal_size;
1383         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1384       }
1385     } else {
1386       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1387       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1388       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1389         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1390         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1391       }
1392     }
1393   }
1394 
1395   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1396   p0_lidx_I = NULL;
1397   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1398     const PetscInt *idxs;
1399 
1400     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1401     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1402     for (i=0;i<pcbddc->benign_n;i++) {
1403       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1404     }
1405     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1406   }
1407 
1408   /* vertices */
1409   if (n_vertices) {
1410 
1411     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1412 
1413     if (n_R) {
1414       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1415       PetscBLASInt B_N,B_one = 1;
1416       PetscScalar  *x,*y;
1417       PetscBool    isseqaij;
1418 
1419       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1420       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1421       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1422       if (F) { /* TODO could be optimized for symmetric problems */
1423         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1424       } else {
1425         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1426         for (i=0;i<n_vertices;i++) {
1427           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1428           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1429           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1430           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1431           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1432         }
1433         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1434       }
1435       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1436       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1437       if (n_constraints) {
1438         Mat B;
1439 
1440         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1441         for (i=0;i<n_vertices;i++) {
1442           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1443           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1444           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1445           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1446           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1447           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1448         }
1449         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1450         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1451         ierr = MatDestroy(&B);CHKERRQ(ierr);
1452         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1453         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1454         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1455         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1456         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1457         ierr = MatDestroy(&B);CHKERRQ(ierr);
1458       }
1459       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1460       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1461         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1462       }
1463       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1464       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1465       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1466       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1467       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1468       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1469       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1470       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1471       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1472       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1473     } else {
1474       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1475     }
1476     for (i=0;i<pcbddc->benign_n;i++) {
1477       const PetscScalar *vals;
1478       const PetscInt    *idxs;
1479       PetscInt          n,j,primal_idx;
1480 
1481       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1482       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1483       for (j=0;j<n;j++) {
1484         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1485         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1486       }
1487       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1488     }
1489     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1490 
1491     /* coarse basis functions */
1492     for (i=0;i<n_vertices;i++) {
1493       PetscScalar *y;
1494 
1495       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1496       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1497       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1498       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1499       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500       y[n_B*i+idx_V_B[i]] = 1.0;
1501       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1502       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1503 
1504       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1505         PetscInt j;
1506 
1507         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1508         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1509         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1512         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1513         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1514       }
1515       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1516     }
1517     /* if n_R == 0 the object is not destroyed */
1518     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1519   }
1520 
1521   if (n_constraints) {
1522     Mat B;
1523 
1524     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1525     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1526     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1527     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1528     if (n_vertices) {
1529       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1530         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1531       } else {
1532         Mat S_VCt;
1533 
1534         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1535         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1536         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1537       }
1538     }
1539     ierr = MatDestroy(&B);CHKERRQ(ierr);
1540     /* coarse basis functions */
1541     for (i=0;i<n_constraints;i++) {
1542       PetscScalar *y;
1543 
1544       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1545       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1546       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1547       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1550       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1551       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1552         PetscInt j;
1553 
1554         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1555         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1556         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1559         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1560         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1561       }
1562       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1563     }
1564   }
1565   if (n_constraints) {
1566     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1567   }
1568   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1569   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1570 
1571   /* compute other basis functions for non-symmetric problems */
1572   if (!pcbddc->symmetric_primal) {
1573 
1574     if (n_constraints) {
1575       Mat S_CCT,B_C;
1576 
1577       /* this is a lazy thing */
1578       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1579       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1580       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1581       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1582       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1583       if (n_vertices) {
1584         Mat B_V,S_VCT;
1585 
1586         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1587         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1588         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1589         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1590         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1591       }
1592       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1593     } else { /* if there are no constraints, reset work */
1594       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1595     }
1596     if (n_vertices && n_R) {
1597       Mat          A_VRT;
1598       PetscScalar  *marray;
1599       PetscBLASInt B_N,B_one = 1;
1600 
1601       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1602       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1603       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1604       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1605       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1606       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1607       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1608     }
1609 
1610     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1611       for (i=0;i<pcbddc->local_primal_size;i++) {
1612         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1613         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1614         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1615         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1616         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1617       }
1618     } else {
1619       for (i=0;i<pcbddc->local_primal_size;i++) {
1620         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1621         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1622         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1623         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1624         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1625       }
1626     }
1627     /* coarse basis functions */
1628     for (i=0;i<pcbddc->local_primal_size;i++) {
1629       PetscScalar *y;
1630 
1631       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1632       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1633       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1634       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1635       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1636       if (i<n_vertices) {
1637         y[n_B*i+idx_V_B[i]] = 1.0;
1638       }
1639       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1640       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1641 
1642       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1643         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1644         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1645         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1647         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1648         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1649       }
1650       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1651     }
1652   }
1653   /* free memory */
1654   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1655   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1656   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1657   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1658   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1659   ierr = PetscFree(work);CHKERRQ(ierr);
1660   if (n_vertices) {
1661     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1662   }
1663   if (n_constraints) {
1664     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1665   }
1666   /* Checking coarse_sub_mat and coarse basis functios */
1667   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1668   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1669   if (pcbddc->dbg_flag) {
1670     Mat         coarse_sub_mat;
1671     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1672     Mat         coarse_phi_D,coarse_phi_B;
1673     Mat         coarse_psi_D,coarse_psi_B;
1674     Mat         A_II,A_BB,A_IB,A_BI;
1675     Mat         C_B,CPHI;
1676     IS          is_dummy;
1677     Vec         mones;
1678     MatType     checkmattype=MATSEQAIJ;
1679     PetscReal   real_value;
1680 
1681     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1682     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1683     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1684     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1685     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1686     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1687     if (unsymmetric_check) {
1688       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1689       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1690     }
1691     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1692 
1693     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1694     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1695     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1696     if (unsymmetric_check) {
1697       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1698       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1699       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1700       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1701       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1702       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1703       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1704       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1705       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1706       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1707       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1708       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1709     } else {
1710       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1711       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1712       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1713       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1714       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1715       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1716       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1717       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1718     }
1719     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1720     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1721     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1722     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1723     if (pcbddc->benign_n) {
1724       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1725       PetscScalar *data,*data2;
1726       PetscInt    j;
1727 
1728       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1729       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1730       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1731       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1732       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1733       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1734       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1735       for (j=0;j<pcbddc->benign_n;j++) {
1736         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1737         for (i=0;i<pcbddc->local_primal_size;i++) {
1738           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1739           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1740         }
1741       }
1742       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1743       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1744       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1745       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1746       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1747       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1748       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1749       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1750       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1751     }
1752 #if 0
1753   {
1754     PetscViewer viewer;
1755     char filename[256];
1756     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1757     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1758     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1759     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1760     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1761   }
1762 #endif
1763     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1764     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1765     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1766     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1767 
1768     /* check constraints */
1769     if (!pcbddc->benign_n) { /* TODO: add benign case */
1770       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1771       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1772       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1773       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1774       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1775       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1776       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1777       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1778       if (unsymmetric_check) {
1779         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1780         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1781         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1782         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1783         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1784       }
1785       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1786       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1787       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1788       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1789     }
1790     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1791     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1792     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1793     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1794     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1795     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1796     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1797     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1798     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1799     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1800     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1801     if (unsymmetric_check) {
1802       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1803       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1804     }
1805     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1806   }
1807   /* get back data */
1808   *coarse_submat_vals_n = coarse_submat_vals;
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1814 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1815 {
1816   Mat            *work_mat;
1817   IS             isrow_s,iscol_s;
1818   PetscBool      rsorted,csorted;
1819   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1820   PetscErrorCode ierr;
1821 
1822   PetscFunctionBegin;
1823   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1824   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1825   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1826   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1827 
1828   if (!rsorted) {
1829     const PetscInt *idxs;
1830     PetscInt *idxs_sorted,i;
1831 
1832     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1833     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1834     for (i=0;i<rsize;i++) {
1835       idxs_perm_r[i] = i;
1836     }
1837     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1838     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1839     for (i=0;i<rsize;i++) {
1840       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1841     }
1842     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1843     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1844   } else {
1845     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1846     isrow_s = isrow;
1847   }
1848 
1849   if (!csorted) {
1850     if (isrow == iscol) {
1851       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1852       iscol_s = isrow_s;
1853     } else {
1854       const PetscInt *idxs;
1855       PetscInt *idxs_sorted,i;
1856 
1857       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1858       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1859       for (i=0;i<csize;i++) {
1860         idxs_perm_c[i] = i;
1861       }
1862       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1863       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1864       for (i=0;i<csize;i++) {
1865         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1866       }
1867       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1868       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1869     }
1870   } else {
1871     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1872     iscol_s = iscol;
1873   }
1874 
1875   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1876 
1877   if (!rsorted || !csorted) {
1878     Mat      new_mat;
1879     IS       is_perm_r,is_perm_c;
1880 
1881     if (!rsorted) {
1882       PetscInt *idxs_r,i;
1883       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1884       for (i=0;i<rsize;i++) {
1885         idxs_r[idxs_perm_r[i]] = i;
1886       }
1887       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1888       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1889     } else {
1890       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1891     }
1892     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1893 
1894     if (!csorted) {
1895       if (isrow_s == iscol_s) {
1896         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1897         is_perm_c = is_perm_r;
1898       } else {
1899         PetscInt *idxs_c,i;
1900         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1901         for (i=0;i<csize;i++) {
1902           idxs_c[idxs_perm_c[i]] = i;
1903         }
1904         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1905         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1906       }
1907     } else {
1908       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1909     }
1910     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1911 
1912     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1913     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1914     work_mat[0] = new_mat;
1915     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1916     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1917   }
1918 
1919   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1920   *B = work_mat[0];
1921   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1922   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1923   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1929 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1930 {
1931   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1932   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1933   Mat            new_mat;
1934   IS             is_local,is_global;
1935   PetscInt       local_size;
1936   PetscBool      isseqaij;
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1941   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1942   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1943   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1944   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1945   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1946   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1947 
1948   /* check */
1949   if (pcbddc->dbg_flag) {
1950     Vec       x,x_change;
1951     PetscReal error;
1952 
1953     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1954     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1955     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1956     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1957     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1958     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1959     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1960     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1961     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1962     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1963     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1965     ierr = VecDestroy(&x);CHKERRQ(ierr);
1966     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1967   }
1968 
1969   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1970   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1971   if (isseqaij) {
1972     Mat M;
1973 
1974     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
1975     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
1976     ierr = MatDestroy(&M);CHKERRQ(ierr);
1977   } else {
1978     Mat work_mat,M;
1979 
1980     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1981     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr);
1982     ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr);
1983     ierr = MatDestroy(&M);CHKERRQ(ierr);
1984     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1985   }
1986   if (matis->A->symmetric_set) {
1987     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1988 #if !defined(PETSC_USE_COMPLEX)
1989     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1990 #endif
1991   }
1992   /*
1993   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1994   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1995   */
1996   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
2002 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
2003 {
2004   PC_IS*          pcis = (PC_IS*)(pc->data);
2005   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
2006   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2007   PetscInt        *idx_R_local=NULL;
2008   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
2009   PetscInt        vbs,bs;
2010   PetscBT         bitmask=NULL;
2011   PetscErrorCode  ierr;
2012 
2013   PetscFunctionBegin;
2014   /*
2015     No need to setup local scatters if
2016       - primal space is unchanged
2017         AND
2018       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
2019         AND
2020       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
2021   */
2022   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
2023     PetscFunctionReturn(0);
2024   }
2025   /* destroy old objects */
2026   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
2027   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
2028   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
2029   /* Set Non-overlapping dimensions */
2030   n_B = pcis->n_B;
2031   n_D = pcis->n - n_B;
2032   n_vertices = pcbddc->n_vertices;
2033 
2034   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
2035 
2036   /* create auxiliary bitmask and allocate workspace */
2037   if (!sub_schurs->reuse_mumps) {
2038     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
2039     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
2040     for (i=0;i<n_vertices;i++) {
2041       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
2042     }
2043 
2044     for (i=0, n_R=0; i<pcis->n; i++) {
2045       if (!PetscBTLookup(bitmask,i)) {
2046         idx_R_local[n_R++] = i;
2047       }
2048     }
2049   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
2050     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2051 
2052     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2053     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
2054   }
2055 
2056   /* Block code */
2057   vbs = 1;
2058   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
2059   if (bs>1 && !(n_vertices%bs)) {
2060     PetscBool is_blocked = PETSC_TRUE;
2061     PetscInt  *vary;
2062     if (!sub_schurs->reuse_mumps) {
2063       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
2064       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
2065       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
2066       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
2067       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
2068       for (i=0; i<pcis->n/bs; i++) {
2069         if (vary[i]!=0 && vary[i]!=bs) {
2070           is_blocked = PETSC_FALSE;
2071           break;
2072         }
2073       }
2074       ierr = PetscFree(vary);CHKERRQ(ierr);
2075     } else {
2076       /* Verify directly the R set */
2077       for (i=0; i<n_R/bs; i++) {
2078         PetscInt j,node=idx_R_local[bs*i];
2079         for (j=1; j<bs; j++) {
2080           if (node != idx_R_local[bs*i+j]-j) {
2081             is_blocked = PETSC_FALSE;
2082             break;
2083           }
2084         }
2085       }
2086     }
2087     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
2088       vbs = bs;
2089       for (i=0;i<n_R/vbs;i++) {
2090         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
2091       }
2092     }
2093   }
2094   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
2095   if (sub_schurs->reuse_mumps) {
2096     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2097 
2098     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2099     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
2100     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
2101     reuse_mumps->is_R = pcbddc->is_R_local;
2102   } else {
2103     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
2104   }
2105 
2106   /* print some info if requested */
2107   if (pcbddc->dbg_flag) {
2108     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2109     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2110     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2111     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2112     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2113     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2114     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2115   }
2116 
2117   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2118   if (!sub_schurs->reuse_mumps) {
2119     IS       is_aux1,is_aux2;
2120     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2121 
2122     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2123     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2124     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2125     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2126     for (i=0; i<n_D; i++) {
2127       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2128     }
2129     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2130     for (i=0, j=0; i<n_R; i++) {
2131       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2132         aux_array1[j++] = i;
2133       }
2134     }
2135     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2136     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2137     for (i=0, j=0; i<n_B; i++) {
2138       if (!PetscBTLookup(bitmask,is_indices[i])) {
2139         aux_array2[j++] = i;
2140       }
2141     }
2142     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2143     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2144     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2145     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2146     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2147 
2148     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2149       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2150       for (i=0, j=0; i<n_R; i++) {
2151         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2152           aux_array1[j++] = i;
2153         }
2154       }
2155       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2156       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2157       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2158     }
2159     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2160     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2161   } else {
2162     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2163     IS               tis;
2164     PetscInt         schur_size;
2165 
2166     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2167     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2168     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2169     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2170     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2171       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2172       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2173       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2174     }
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2182 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2183 {
2184   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2185   PC_IS          *pcis = (PC_IS*)pc->data;
2186   PC             pc_temp;
2187   Mat            A_RR;
2188   MatReuse       reuse;
2189   PetscScalar    m_one = -1.0;
2190   PetscReal      value;
2191   PetscInt       n_D,n_R;
2192   PetscBool      use_exact,use_exact_reduced,issbaij;
2193   PetscErrorCode ierr;
2194   /* prefixes stuff */
2195   char           dir_prefix[256],neu_prefix[256],str_level[16];
2196   size_t         len;
2197 
2198   PetscFunctionBegin;
2199 
2200   /* compute prefixes */
2201   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2202   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2203   if (!pcbddc->current_level) {
2204     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2205     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2206     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2207     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2208   } else {
2209     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2210     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2211     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2212     len -= 15; /* remove "pc_bddc_coarse_" */
2213     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2214     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2215     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2216     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2217     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2218     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2219     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2220     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2221   }
2222 
2223   /* DIRICHLET PROBLEM */
2224   if (dirichlet) {
2225     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2226     if (pcbddc->local_mat->symmetric_set) {
2227       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2228     }
2229     /* Matrix for Dirichlet problem is pcis->A_II */
2230     n_D = pcis->n - pcis->n_B;
2231     if (!pcbddc->ksp_D) { /* create object if not yet build */
2232       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2233       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2234       /* default */
2235       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2236       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2237       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2238       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2239       if (issbaij) {
2240         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2241       } else {
2242         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2243       }
2244       /* Allow user's customization */
2245       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2246       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2247     }
2248     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2249     if (sub_schurs->reuse_mumps) {
2250       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2251 
2252       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2253     }
2254     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2255     if (!n_D) {
2256       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2257       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2258     }
2259     /* Set Up KSP for Dirichlet problem of BDDC */
2260     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2261     /* set ksp_D into pcis data */
2262     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2263     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2264     pcis->ksp_D = pcbddc->ksp_D;
2265   }
2266 
2267   /* NEUMANN PROBLEM */
2268   A_RR = 0;
2269   if (neumann) {
2270     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2271     PetscInt        ibs,mbs;
2272     PetscBool       issbaij;
2273     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2274     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2275     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2276     if (pcbddc->ksp_R) { /* already created ksp */
2277       PetscInt nn_R;
2278       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2279       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2280       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2281       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2282         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2283         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2284         reuse = MAT_INITIAL_MATRIX;
2285       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2286         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2287           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2288           reuse = MAT_INITIAL_MATRIX;
2289         } else { /* safe to reuse the matrix */
2290           reuse = MAT_REUSE_MATRIX;
2291         }
2292       }
2293       /* last check */
2294       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2295         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2296         reuse = MAT_INITIAL_MATRIX;
2297       }
2298     } else { /* first time, so we need to create the matrix */
2299       reuse = MAT_INITIAL_MATRIX;
2300     }
2301     /* extract A_RR */
2302     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2303     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2304     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2305     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2306       if (matis->A == pcbddc->local_mat) {
2307         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2308         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2309       } else {
2310         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2311       }
2312     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2313       if (matis->A == pcbddc->local_mat) {
2314         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2315         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2316       } else {
2317         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2318       }
2319     }
2320     if (!sub_schurs->reuse_mumps) {
2321       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2322       if (pcbddc->local_mat->symmetric_set) {
2323         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2324       }
2325     } else {
2326       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2327 
2328       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2329       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
2330       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2331     }
2332     if (!pcbddc->ksp_R) { /* create object if not present */
2333       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2334       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2335       /* default */
2336       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2337       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2338       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2339       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2340       if (issbaij) {
2341         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2342       } else {
2343         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2344       }
2345       /* Allow user's customization */
2346       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2347       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2348     }
2349     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2350     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2351     if (!n_R) {
2352       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2353       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2354     }
2355     /* Reuse MUMPS solver if it is present */
2356     if (sub_schurs->reuse_mumps) {
2357       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2358 
2359       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2360     }
2361     /* Set Up KSP for Neumann problem of BDDC */
2362     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2363   }
2364   /* free Neumann problem's matrix */
2365   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2366 
2367   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2368   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2369     if (pcbddc->dbg_flag) {
2370       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2371       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2372       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2373     }
2374     if (dirichlet) { /* Dirichlet */
2375       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2376       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2377       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2378       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2379       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2380       /* need to be adapted? */
2381       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2382       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2383       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2384       /* print info */
2385       if (pcbddc->dbg_flag) {
2386         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2387         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2388       }
2389       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2390         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2391       }
2392     }
2393     if (neumann) { /* Neumann */
2394       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2395       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2396       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2397       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2398       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2399       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2400       /* need to be adapted? */
2401       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2402       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2403       /* print info */
2404       if (pcbddc->dbg_flag) {
2405         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2406         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2407       }
2408       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2409         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2410       }
2411     }
2412   }
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2418 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2419 {
2420   PetscErrorCode  ierr;
2421   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2422   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2423 
2424   PetscFunctionBegin;
2425   if (!sub_schurs->reuse_mumps) {
2426     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2427   }
2428   if (!pcbddc->switch_static) {
2429     if (applytranspose && pcbddc->local_auxmat1) {
2430       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2431       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2432     }
2433     if (!sub_schurs->reuse_mumps) {
2434       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2435       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2436     } else {
2437       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2438 
2439       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2440       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2441     }
2442   } else {
2443     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2444     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2445     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2446     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2447     if (applytranspose && pcbddc->local_auxmat1) {
2448       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2449       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2450       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2451       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2452     }
2453   }
2454   if (!sub_schurs->reuse_mumps) {
2455     if (applytranspose) {
2456       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2457     } else {
2458       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2459     }
2460 #if defined(PETSC_HAVE_MUMPS)
2461   } else {
2462     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2463 
2464     if (applytranspose) {
2465       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2466     } else {
2467       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2468     }
2469 #endif
2470   }
2471   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2472   if (!pcbddc->switch_static) {
2473     if (!sub_schurs->reuse_mumps) {
2474       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2475       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2476     } else {
2477       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2478 
2479       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2480       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2481     }
2482     if (!applytranspose && pcbddc->local_auxmat1) {
2483       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2484       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2485     }
2486   } else {
2487     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2488     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2489     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2490     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2491     if (!applytranspose && pcbddc->local_auxmat1) {
2492       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2493       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2494     }
2495     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2496     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2497     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2498     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2499   }
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2504 #undef __FUNCT__
2505 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2506 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2507 {
2508   PetscErrorCode ierr;
2509   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2510   PC_IS*            pcis = (PC_IS*)  (pc->data);
2511   const PetscScalar zero = 0.0;
2512 
2513   PetscFunctionBegin;
2514   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2515   if (applytranspose) {
2516     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2517     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2518   } else {
2519     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2520     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2521   }
2522 
2523   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2524   if (pcbddc->benign_n) {
2525     PetscScalar *array;
2526     PetscInt    j;
2527 
2528     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2529     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2530     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2531   }
2532 
2533   /* start communications from local primal nodes to rhs of coarse solver */
2534   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2535   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2536   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2537 
2538   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2539   /* TODO remove null space when doing multilevel */
2540   if (pcbddc->coarse_ksp) {
2541     Mat coarse_mat;
2542     Vec rhs,sol;
2543     MatNullSpace nullsp;
2544 
2545     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2546     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2547     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2548     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2549     if (nullsp) {
2550       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2551     }
2552     if (applytranspose) {
2553       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2554     } else {
2555       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2556     }
2557     if (nullsp) {
2558       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2559     }
2560   }
2561 
2562   /* Local solution on R nodes */
2563   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2564     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2565   }
2566 
2567   /* communications from coarse sol to local primal nodes */
2568   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2569   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2570 
2571   /* Sum contributions from two levels */
2572   if (applytranspose) {
2573     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2574     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2575   } else {
2576     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2577     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2578   }
2579   /* store p0 */
2580   if (pcbddc->benign_n) {
2581     PetscScalar *array;
2582     PetscInt    j;
2583 
2584     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2585     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2586     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2587   }
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2593 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2594 {
2595   PetscErrorCode ierr;
2596   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2597   PetscScalar    *array;
2598   Vec            from,to;
2599 
2600   PetscFunctionBegin;
2601   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2602     from = pcbddc->coarse_vec;
2603     to = pcbddc->vec1_P;
2604     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2605       Vec tvec;
2606 
2607       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2608       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2609       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2610       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2611       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2612       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2613     }
2614   } else { /* from local to global -> put data in coarse right hand side */
2615     from = pcbddc->vec1_P;
2616     to = pcbddc->coarse_vec;
2617   }
2618   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 #undef __FUNCT__
2623 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2624 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2625 {
2626   PetscErrorCode ierr;
2627   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2628   PetscScalar    *array;
2629   Vec            from,to;
2630 
2631   PetscFunctionBegin;
2632   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2633     from = pcbddc->coarse_vec;
2634     to = pcbddc->vec1_P;
2635   } else { /* from local to global -> put data in coarse right hand side */
2636     from = pcbddc->vec1_P;
2637     to = pcbddc->coarse_vec;
2638   }
2639   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2640   if (smode == SCATTER_FORWARD) {
2641     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2642       Vec tvec;
2643 
2644       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2645       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2646       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2647       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2648     }
2649   } else {
2650     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2651      ierr = VecResetArray(from);CHKERRQ(ierr);
2652     }
2653   }
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 /* uncomment for testing purposes */
2658 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2659 #undef __FUNCT__
2660 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2661 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2662 {
2663   PetscErrorCode    ierr;
2664   PC_IS*            pcis = (PC_IS*)(pc->data);
2665   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2666   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2667   /* one and zero */
2668   PetscScalar       one=1.0,zero=0.0;
2669   /* space to store constraints and their local indices */
2670   PetscScalar       *constraints_data;
2671   PetscInt          *constraints_idxs,*constraints_idxs_B;
2672   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2673   PetscInt          *constraints_n;
2674   /* iterators */
2675   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2676   /* BLAS integers */
2677   PetscBLASInt      lwork,lierr;
2678   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2679   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2680   /* reuse */
2681   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2682   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2683   /* change of basis */
2684   PetscBool         qr_needed;
2685   PetscBT           change_basis,qr_needed_idx;
2686   /* auxiliary stuff */
2687   PetscInt          *nnz,*is_indices;
2688   PetscInt          ncc;
2689   /* some quantities */
2690   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2691   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2692 
2693   PetscFunctionBegin;
2694   /* Destroy Mat objects computed previously */
2695   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2696   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2697   /* save info on constraints from previous setup (if any) */
2698   olocal_primal_size = pcbddc->local_primal_size;
2699   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2700   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2701   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2702   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2703   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2704   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2705 
2706   /* print some info */
2707   if (pcbddc->dbg_flag) {
2708     IS       vertices;
2709     PetscInt nv,nedges,nfaces;
2710     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2711     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2712     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2713     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2714     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2715     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2716     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2717     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2718     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2719   }
2720 
2721   if (!pcbddc->adaptive_selection) {
2722     IS           ISForVertices,*ISForFaces,*ISForEdges;
2723     MatNullSpace nearnullsp;
2724     const Vec    *nearnullvecs;
2725     Vec          *localnearnullsp;
2726     PetscScalar  *array;
2727     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2728     PetscBool    nnsp_has_cnst;
2729     /* LAPACK working arrays for SVD or POD */
2730     PetscBool    skip_lapack,boolforchange;
2731     PetscScalar  *work;
2732     PetscReal    *singular_vals;
2733 #if defined(PETSC_USE_COMPLEX)
2734     PetscReal    *rwork;
2735 #endif
2736 #if defined(PETSC_MISSING_LAPACK_GESVD)
2737     PetscScalar  *temp_basis,*correlation_mat;
2738 #else
2739     PetscBLASInt dummy_int=1;
2740     PetscScalar  dummy_scalar=1.;
2741 #endif
2742 
2743     /* Get index sets for faces, edges and vertices from graph */
2744     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2745     /* free unneeded index sets */
2746     if (!pcbddc->use_vertices) {
2747       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2748     }
2749     if (!pcbddc->use_edges) {
2750       for (i=0;i<n_ISForEdges;i++) {
2751         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2752       }
2753       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2754       n_ISForEdges = 0;
2755     }
2756     if (!pcbddc->use_faces) {
2757       for (i=0;i<n_ISForFaces;i++) {
2758         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2759       }
2760       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2761       n_ISForFaces = 0;
2762     }
2763 
2764 #if defined(PETSC_USE_DEBUG)
2765     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2766        Also use_change_of_basis should be consistent among processors */
2767     if (pcbddc->NullSpace) {
2768       PetscBool tbool[2],gbool[2];
2769 
2770       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2771         pcbddc->use_change_of_basis = PETSC_TRUE;
2772         if (!ISForEdges) {
2773           pcbddc->use_change_on_faces = PETSC_TRUE;
2774         }
2775       }
2776       tbool[0] = pcbddc->use_change_of_basis;
2777       tbool[1] = pcbddc->use_change_on_faces;
2778       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2779       pcbddc->use_change_of_basis = gbool[0];
2780       pcbddc->use_change_on_faces = gbool[1];
2781     }
2782 #endif
2783 
2784     /* check if near null space is attached to global mat */
2785     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2786     if (nearnullsp) {
2787       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2788       /* remove any stored info */
2789       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2790       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2791       /* store information for BDDC solver reuse */
2792       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2793       pcbddc->onearnullspace = nearnullsp;
2794       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2795       for (i=0;i<nnsp_size;i++) {
2796         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2797       }
2798     } else { /* if near null space is not provided BDDC uses constants by default */
2799       nnsp_size = 0;
2800       nnsp_has_cnst = PETSC_TRUE;
2801     }
2802     /* get max number of constraints on a single cc */
2803     max_constraints = nnsp_size;
2804     if (nnsp_has_cnst) max_constraints++;
2805 
2806     /*
2807          Evaluate maximum storage size needed by the procedure
2808          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2809          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2810          There can be multiple constraints per connected component
2811                                                                                                                                                            */
2812     n_vertices = 0;
2813     if (ISForVertices) {
2814       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2815     }
2816     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2817     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2818 
2819     total_counts = n_ISForFaces+n_ISForEdges;
2820     total_counts *= max_constraints;
2821     total_counts += n_vertices;
2822     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2823 
2824     total_counts = 0;
2825     max_size_of_constraint = 0;
2826     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2827       IS used_is;
2828       if (i<n_ISForEdges) {
2829         used_is = ISForEdges[i];
2830       } else {
2831         used_is = ISForFaces[i-n_ISForEdges];
2832       }
2833       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2834       total_counts += j;
2835       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2836     }
2837     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2838 
2839     /* get local part of global near null space vectors */
2840     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2841     for (k=0;k<nnsp_size;k++) {
2842       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2843       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845     }
2846 
2847     /* whether or not to skip lapack calls */
2848     skip_lapack = PETSC_TRUE;
2849     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2850 
2851     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2852     if (!skip_lapack) {
2853       PetscScalar temp_work;
2854 
2855 #if defined(PETSC_MISSING_LAPACK_GESVD)
2856       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2857       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2858       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2859       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2860 #if defined(PETSC_USE_COMPLEX)
2861       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2862 #endif
2863       /* now we evaluate the optimal workspace using query with lwork=-1 */
2864       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2865       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2866       lwork = -1;
2867       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2868 #if !defined(PETSC_USE_COMPLEX)
2869       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2870 #else
2871       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2872 #endif
2873       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2874       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2875 #else /* on missing GESVD */
2876       /* SVD */
2877       PetscInt max_n,min_n;
2878       max_n = max_size_of_constraint;
2879       min_n = max_constraints;
2880       if (max_size_of_constraint < max_constraints) {
2881         min_n = max_size_of_constraint;
2882         max_n = max_constraints;
2883       }
2884       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2885 #if defined(PETSC_USE_COMPLEX)
2886       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2887 #endif
2888       /* now we evaluate the optimal workspace using query with lwork=-1 */
2889       lwork = -1;
2890       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2891       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2892       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2893       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2894 #if !defined(PETSC_USE_COMPLEX)
2895       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2896 #else
2897       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2898 #endif
2899       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2900       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2901 #endif /* on missing GESVD */
2902       /* Allocate optimal workspace */
2903       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2904       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2905     }
2906     /* Now we can loop on constraining sets */
2907     total_counts = 0;
2908     constraints_idxs_ptr[0] = 0;
2909     constraints_data_ptr[0] = 0;
2910     /* vertices */
2911     if (n_vertices) {
2912       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2913       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2914       for (i=0;i<n_vertices;i++) {
2915         constraints_n[total_counts] = 1;
2916         constraints_data[total_counts] = 1.0;
2917         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2918         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2919         total_counts++;
2920       }
2921       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2922       n_vertices = total_counts;
2923     }
2924 
2925     /* edges and faces */
2926     total_counts_cc = total_counts;
2927     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2928       IS        used_is;
2929       PetscBool idxs_copied = PETSC_FALSE;
2930 
2931       if (ncc<n_ISForEdges) {
2932         used_is = ISForEdges[ncc];
2933         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2934       } else {
2935         used_is = ISForFaces[ncc-n_ISForEdges];
2936         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2937       }
2938       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2939 
2940       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2941       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2942       /* change of basis should not be performed on local periodic nodes */
2943       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2944       if (nnsp_has_cnst) {
2945         PetscScalar quad_value;
2946 
2947         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2948         idxs_copied = PETSC_TRUE;
2949 
2950         if (!pcbddc->use_nnsp_true) {
2951           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2952         } else {
2953           quad_value = 1.0;
2954         }
2955         for (j=0;j<size_of_constraint;j++) {
2956           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2957         }
2958         temp_constraints++;
2959         total_counts++;
2960       }
2961       for (k=0;k<nnsp_size;k++) {
2962         PetscReal real_value;
2963         PetscScalar *ptr_to_data;
2964 
2965         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2966         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2967         for (j=0;j<size_of_constraint;j++) {
2968           ptr_to_data[j] = array[is_indices[j]];
2969         }
2970         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2971         /* check if array is null on the connected component */
2972         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2973         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2974         if (real_value > 0.0) { /* keep indices and values */
2975           temp_constraints++;
2976           total_counts++;
2977           if (!idxs_copied) {
2978             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2979             idxs_copied = PETSC_TRUE;
2980           }
2981         }
2982       }
2983       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2984       valid_constraints = temp_constraints;
2985       if (!pcbddc->use_nnsp_true && temp_constraints) {
2986         if (temp_constraints == 1) { /* just normalize the constraint */
2987           PetscScalar norm,*ptr_to_data;
2988 
2989           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2990           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2991           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2992           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2993           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2994         } else { /* perform SVD */
2995           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2996           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2997 
2998 #if defined(PETSC_MISSING_LAPACK_GESVD)
2999           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
3000              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
3001              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
3002                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
3003                 from that computed using LAPACKgesvd
3004              -> This is due to a different computation of eigenvectors in LAPACKheev
3005              -> The quality of the POD-computed basis will be the same */
3006           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
3007           /* Store upper triangular part of correlation matrix */
3008           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3009           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3010           for (j=0;j<temp_constraints;j++) {
3011             for (k=0;k<j+1;k++) {
3012               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
3013             }
3014           }
3015           /* compute eigenvalues and eigenvectors of correlation matrix */
3016           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3017           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
3018 #if !defined(PETSC_USE_COMPLEX)
3019           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
3020 #else
3021           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
3022 #endif
3023           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3024           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
3025           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
3026           j = 0;
3027           while (j < temp_constraints && singular_vals[j] < tol) j++;
3028           total_counts = total_counts-j;
3029           valid_constraints = temp_constraints-j;
3030           /* scale and copy POD basis into used quadrature memory */
3031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3032           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3033           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
3034           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3035           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
3036           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3037           if (j<temp_constraints) {
3038             PetscInt ii;
3039             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
3040             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3041             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
3042             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3043             for (k=0;k<temp_constraints-j;k++) {
3044               for (ii=0;ii<size_of_constraint;ii++) {
3045                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
3046               }
3047             }
3048           }
3049 #else  /* on missing GESVD */
3050           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3051           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
3052           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3053           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3054 #if !defined(PETSC_USE_COMPLEX)
3055           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
3056 #else
3057           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
3058 #endif
3059           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
3060           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3061           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
3062           k = temp_constraints;
3063           if (k > size_of_constraint) k = size_of_constraint;
3064           j = 0;
3065           while (j < k && singular_vals[k-j-1] < tol) j++;
3066           valid_constraints = k-j;
3067           total_counts = total_counts-temp_constraints+valid_constraints;
3068 #endif /* on missing GESVD */
3069         }
3070       }
3071       /* update pointers information */
3072       if (valid_constraints) {
3073         constraints_n[total_counts_cc] = valid_constraints;
3074         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
3075         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
3076         /* set change_of_basis flag */
3077         if (boolforchange) {
3078           PetscBTSet(change_basis,total_counts_cc);
3079         }
3080         total_counts_cc++;
3081       }
3082     }
3083     /* free workspace */
3084     if (!skip_lapack) {
3085       ierr = PetscFree(work);CHKERRQ(ierr);
3086 #if defined(PETSC_USE_COMPLEX)
3087       ierr = PetscFree(rwork);CHKERRQ(ierr);
3088 #endif
3089       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
3090 #if defined(PETSC_MISSING_LAPACK_GESVD)
3091       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
3092       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
3093 #endif
3094     }
3095     for (k=0;k<nnsp_size;k++) {
3096       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
3097     }
3098     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
3099     /* free index sets of faces, edges and vertices */
3100     for (i=0;i<n_ISForFaces;i++) {
3101       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
3102     }
3103     if (n_ISForFaces) {
3104       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3105     }
3106     for (i=0;i<n_ISForEdges;i++) {
3107       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3108     }
3109     if (n_ISForEdges) {
3110       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3111     }
3112     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3113   } else {
3114     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3115 
3116     total_counts = 0;
3117     n_vertices = 0;
3118     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3119       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3120     }
3121     max_constraints = 0;
3122     total_counts_cc = 0;
3123     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3124       total_counts += pcbddc->adaptive_constraints_n[i];
3125       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3126       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3127     }
3128     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3129     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3130     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3131     constraints_data = pcbddc->adaptive_constraints_data;
3132     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3133     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3134     total_counts_cc = 0;
3135     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3136       if (pcbddc->adaptive_constraints_n[i]) {
3137         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3138       }
3139     }
3140 #if 0
3141     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3142     for (i=0;i<total_counts_cc;i++) {
3143       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3144       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3145       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3146         printf(" %d",constraints_idxs[j]);
3147       }
3148       printf("\n");
3149       printf("number of cc: %d\n",constraints_n[i]);
3150     }
3151     for (i=0;i<n_vertices;i++) {
3152       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3153     }
3154     for (i=0;i<sub_schurs->n_subs;i++) {
3155       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3156     }
3157 #endif
3158 
3159     max_size_of_constraint = 0;
3160     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3161     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3162     /* Change of basis */
3163     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3164     if (pcbddc->use_change_of_basis) {
3165       for (i=0;i<sub_schurs->n_subs;i++) {
3166         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3167           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3168         }
3169       }
3170     }
3171   }
3172   pcbddc->local_primal_size = total_counts;
3173   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3174 
3175   /* map constraints_idxs in boundary numbering */
3176   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3177   if (i != constraints_idxs_ptr[total_counts_cc]) {
3178     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3179   }
3180 
3181   /* Create constraint matrix */
3182   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3183   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3184   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3185 
3186   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3187   /* determine if a QR strategy is needed for change of basis */
3188   qr_needed = PETSC_FALSE;
3189   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3190   total_primal_vertices=0;
3191   pcbddc->local_primal_size_cc = 0;
3192   for (i=0;i<total_counts_cc;i++) {
3193     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3194     if (size_of_constraint == 1) {
3195       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3196       pcbddc->local_primal_size_cc += 1;
3197     } else if (PetscBTLookup(change_basis,i)) {
3198       for (k=0;k<constraints_n[i];k++) {
3199         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3200       }
3201       pcbddc->local_primal_size_cc += constraints_n[i];
3202       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3203         PetscBTSet(qr_needed_idx,i);
3204         qr_needed = PETSC_TRUE;
3205       }
3206     } else {
3207       pcbddc->local_primal_size_cc += 1;
3208     }
3209   }
3210   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3211   pcbddc->n_vertices = total_primal_vertices;
3212   /* permute indices in order to have a sorted set of vertices */
3213   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3214 
3215   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3216   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3217   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3218 
3219   /* nonzero structure of constraint matrix */
3220   /* and get reference dof for local constraints */
3221   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3222   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3223 
3224   j = total_primal_vertices;
3225   total_counts = total_primal_vertices;
3226   cum = total_primal_vertices;
3227   for (i=n_vertices;i<total_counts_cc;i++) {
3228     if (!PetscBTLookup(change_basis,i)) {
3229       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3230       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3231       cum++;
3232       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3233       for (k=0;k<constraints_n[i];k++) {
3234         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3235         nnz[j+k] = size_of_constraint;
3236       }
3237       j += constraints_n[i];
3238     }
3239   }
3240   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3241   ierr = PetscFree(nnz);CHKERRQ(ierr);
3242 
3243   /* set values in constraint matrix */
3244   for (i=0;i<total_primal_vertices;i++) {
3245     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3246   }
3247   total_counts = total_primal_vertices;
3248   for (i=n_vertices;i<total_counts_cc;i++) {
3249     if (!PetscBTLookup(change_basis,i)) {
3250       PetscInt *cols;
3251 
3252       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3253       cols = constraints_idxs+constraints_idxs_ptr[i];
3254       for (k=0;k<constraints_n[i];k++) {
3255         PetscInt    row = total_counts+k;
3256         PetscScalar *vals;
3257 
3258         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3259         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3260       }
3261       total_counts += constraints_n[i];
3262     }
3263   }
3264   /* assembling */
3265   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3266   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3267 
3268   /*
3269   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3270   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3271   */
3272   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3273   if (pcbddc->use_change_of_basis) {
3274     /* dual and primal dofs on a single cc */
3275     PetscInt     dual_dofs,primal_dofs;
3276     /* working stuff for GEQRF */
3277     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3278     PetscBLASInt lqr_work;
3279     /* working stuff for UNGQR */
3280     PetscScalar  *gqr_work,lgqr_work_t;
3281     PetscBLASInt lgqr_work;
3282     /* working stuff for TRTRS */
3283     PetscScalar  *trs_rhs;
3284     PetscBLASInt Blas_NRHS;
3285     /* pointers for values insertion into change of basis matrix */
3286     PetscInt     *start_rows,*start_cols;
3287     PetscScalar  *start_vals;
3288     /* working stuff for values insertion */
3289     PetscBT      is_primal;
3290     PetscInt     *aux_primal_numbering_B;
3291     /* matrix sizes */
3292     PetscInt     global_size,local_size;
3293     /* temporary change of basis */
3294     Mat          localChangeOfBasisMatrix;
3295     /* extra space for debugging */
3296     PetscScalar  *dbg_work;
3297 
3298     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3299     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3300     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3301     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3302     /* nonzeros for local mat */
3303     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3304     for (i=0;i<pcis->n;i++) nnz[i]=1;
3305     for (i=n_vertices;i<total_counts_cc;i++) {
3306       if (PetscBTLookup(change_basis,i)) {
3307         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3308         if (PetscBTLookup(qr_needed_idx,i)) {
3309           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3310         } else {
3311           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3312           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3313         }
3314       }
3315     }
3316     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3317     ierr = PetscFree(nnz);CHKERRQ(ierr);
3318     /* Set initial identity in the matrix */
3319     for (i=0;i<pcis->n;i++) {
3320       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3321     }
3322 
3323     if (pcbddc->dbg_flag) {
3324       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3325       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3326     }
3327 
3328 
3329     /* Now we loop on the constraints which need a change of basis */
3330     /*
3331        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3332        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3333 
3334        Basic blocks of change of basis matrix T computed by
3335 
3336           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3337 
3338             | 1        0   ...        0         s_1/S |
3339             | 0        1   ...        0         s_2/S |
3340             |              ...                        |
3341             | 0        ...            1     s_{n-1}/S |
3342             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3343 
3344             with S = \sum_{i=1}^n s_i^2
3345             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3346                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3347 
3348           - QR decomposition of constraints otherwise
3349     */
3350     if (qr_needed) {
3351       /* space to store Q */
3352       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3353       /* first we issue queries for optimal work */
3354       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3355       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3356       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3357       lqr_work = -1;
3358       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3359       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3360       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3361       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3362       lgqr_work = -1;
3363       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3364       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3365       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3366       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3367       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3368       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3369       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3370       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3371       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3372       /* array to store scaling factors for reflectors */
3373       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3374       /* array to store rhs and solution of triangular solver */
3375       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3376       /* allocating workspace for check */
3377       if (pcbddc->dbg_flag) {
3378         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3379       }
3380     }
3381     /* array to store whether a node is primal or not */
3382     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3383     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3384     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3385     if (i != total_primal_vertices) {
3386       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3387     }
3388     for (i=0;i<total_primal_vertices;i++) {
3389       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3390     }
3391     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3392 
3393     /* loop on constraints and see whether or not they need a change of basis and compute it */
3394     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3395       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3396       if (PetscBTLookup(change_basis,total_counts)) {
3397         /* get constraint info */
3398         primal_dofs = constraints_n[total_counts];
3399         dual_dofs = size_of_constraint-primal_dofs;
3400 
3401         if (pcbddc->dbg_flag) {
3402           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3403         }
3404 
3405         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3406 
3407           /* copy quadrature constraints for change of basis check */
3408           if (pcbddc->dbg_flag) {
3409             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3410           }
3411           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3412           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3413 
3414           /* compute QR decomposition of constraints */
3415           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3416           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3417           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3418           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3419           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3420           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3421           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3422 
3423           /* explictly compute R^-T */
3424           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3425           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3426           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3427           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3428           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3429           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3430           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3431           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3432           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3433           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3434 
3435           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3436           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3437           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3438           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3439           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3440           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3441           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3442           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3443           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3444 
3445           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3446              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3447              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3448           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3449           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3450           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3451           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3452           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3453           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3454           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3455           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3456           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3457           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3458 
3459           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3460           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3461           /* insert cols for primal dofs */
3462           for (j=0;j<primal_dofs;j++) {
3463             start_vals = &qr_basis[j*size_of_constraint];
3464             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3465             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3466           }
3467           /* insert cols for dual dofs */
3468           for (j=0,k=0;j<dual_dofs;k++) {
3469             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3470               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3471               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3472               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3473               j++;
3474             }
3475           }
3476 
3477           /* check change of basis */
3478           if (pcbddc->dbg_flag) {
3479             PetscInt   ii,jj;
3480             PetscBool valid_qr=PETSC_TRUE;
3481             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3482             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3483             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3484             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3485             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3486             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3487             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3488             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3489             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3490             for (jj=0;jj<size_of_constraint;jj++) {
3491               for (ii=0;ii<primal_dofs;ii++) {
3492                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3493                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3494               }
3495             }
3496             if (!valid_qr) {
3497               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3498               for (jj=0;jj<size_of_constraint;jj++) {
3499                 for (ii=0;ii<primal_dofs;ii++) {
3500                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3501                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3502                   }
3503                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3504                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3505                   }
3506                 }
3507               }
3508             } else {
3509               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3510             }
3511           }
3512         } else { /* simple transformation block */
3513           PetscInt    row,col;
3514           PetscScalar val,norm;
3515 
3516           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3517           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3518           for (j=0;j<size_of_constraint;j++) {
3519             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3520             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3521             if (!PetscBTLookup(is_primal,row_B)) {
3522               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3523               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3524               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3525             } else {
3526               for (k=0;k<size_of_constraint;k++) {
3527                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3528                 if (row != col) {
3529                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3530                 } else {
3531                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3532                 }
3533                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3534               }
3535             }
3536           }
3537           if (pcbddc->dbg_flag) {
3538             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3539           }
3540         }
3541       } else {
3542         if (pcbddc->dbg_flag) {
3543           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3544         }
3545       }
3546     }
3547 
3548     /* free workspace */
3549     if (qr_needed) {
3550       if (pcbddc->dbg_flag) {
3551         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3552       }
3553       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3554       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3555       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3556       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3557       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3558     }
3559     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3560     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562 
3563     /* assembling of global change of variable */
3564     {
3565       Mat      tmat;
3566       PetscInt bs;
3567 
3568       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3569       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3570       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3571       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3572       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3573       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3574       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3575       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3576       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3577       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3578       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3579       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3580       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3581       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3582       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3583       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3584       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3585       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3586     }
3587     /* check */
3588     if (pcbddc->dbg_flag) {
3589       PetscReal error;
3590       Vec       x,x_change;
3591 
3592       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3593       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3594       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3595       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3596       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3597       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3598       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3599       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3600       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3601       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3602       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3603       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3604       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3605       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3606       ierr = VecDestroy(&x);CHKERRQ(ierr);
3607       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3608     }
3609 
3610     /* adapt sub_schurs computed (if any) */
3611     if (pcbddc->use_deluxe_scaling) {
3612       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3613       if (sub_schurs->S_Ej_all) {
3614         Mat                    S_new,tmat;
3615         ISLocalToGlobalMapping NtoSall;
3616         IS                     is_all_N,is_V,is_V_Sall;
3617         const PetscScalar      *array;
3618         const PetscInt         *idxs_V,*idxs_all;
3619         PetscInt               i,n_V;
3620 
3621         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3622         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3623         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3624         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3625         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3626         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3627         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3628         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3629         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3630         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3631         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3632         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3633         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3634         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3635         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3636         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3637         for (i=0;i<n_V;i++) {
3638           PetscScalar val;
3639           PetscInt    idx;
3640 
3641           idx = idxs_V[i];
3642           val = array[idxs_all[idxs_V[i]]];
3643           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3644         }
3645         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3646         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647         sub_schurs->S_Ej_all = S_new;
3648         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3649         if (sub_schurs->sum_S_Ej_all) {
3650           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3651           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3652           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3653           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3654           sub_schurs->sum_S_Ej_all = S_new;
3655           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3656         }
3657         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3658         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3659         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3660         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3661         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3662       }
3663     }
3664     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3665   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3666     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3667     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3668   }
3669 
3670   /* set up change of basis context */
3671   if (pcbddc->ChangeOfBasisMatrix) {
3672     PCBDDCChange_ctx change_ctx;
3673 
3674     if (!pcbddc->new_global_mat) {
3675       PetscInt global_size,local_size;
3676 
3677       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3678       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3679       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3680       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3681       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3682       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3683       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3684       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3685       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3686     } else {
3687       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3688       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3689       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3690     }
3691     if (!pcbddc->user_ChangeOfBasisMatrix) {
3692       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3693       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3694     } else {
3695       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3696       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3697     }
3698     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3699     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3700     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3701     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3702   }
3703 
3704   /* add pressure dofs to set of primal nodes for numbering purposes */
3705   for (i=0;i<pcbddc->benign_n;i++) {
3706     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3707     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3708     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3709     pcbddc->local_primal_size_cc++;
3710     pcbddc->local_primal_size++;
3711   }
3712 
3713   /* check if a new primal space has been introduced (also take into account benign trick) */
3714   pcbddc->new_primal_space_local = PETSC_TRUE;
3715   if (olocal_primal_size == pcbddc->local_primal_size) {
3716     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3717     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3718     if (!pcbddc->new_primal_space_local) {
3719       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3720       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3721     }
3722   }
3723   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3724   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3725   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3726 
3727   /* flush dbg viewer */
3728   if (pcbddc->dbg_flag) {
3729     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3730   }
3731 
3732   /* free workspace */
3733   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3734   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3735   if (!pcbddc->adaptive_selection) {
3736     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3737     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3738   } else {
3739     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3740                       pcbddc->adaptive_constraints_idxs_ptr,
3741                       pcbddc->adaptive_constraints_data_ptr,
3742                       pcbddc->adaptive_constraints_idxs,
3743                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3744     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3745     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3746   }
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3752 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3753 {
3754   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3755   PC_IS       *pcis = (PC_IS*)pc->data;
3756   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3757   PetscInt    ierr,i,vertex_size,N;
3758   PetscViewer viewer=pcbddc->dbg_viewer;
3759 
3760   PetscFunctionBegin;
3761   /* Reset previously computed graph */
3762   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3763   /* Init local Graph struct */
3764   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3765   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3766 
3767   /* Check validity of the csr graph passed in by the user */
3768   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3769     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3770   }
3771 
3772   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3773   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3774     PetscInt  *xadj,*adjncy;
3775     PetscInt  nvtxs;
3776     PetscBool flg_row=PETSC_FALSE;
3777 
3778     if (pcbddc->use_local_adj) {
3779 
3780       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3781       if (flg_row) {
3782         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3783         pcbddc->computed_rowadj = PETSC_TRUE;
3784       }
3785       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3786     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3787       IS                     is_dummy;
3788       ISLocalToGlobalMapping l2gmap_dummy;
3789       PetscInt               j,sum;
3790       PetscInt               *cxadj,*cadjncy;
3791       const PetscInt         *idxs;
3792       PCBDDCGraph            graph;
3793       PetscBT                is_on_boundary;
3794 
3795       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3796       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3797       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3798       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3799       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3800       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3801       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3802       if (flg_row) {
3803         graph->xadj = xadj;
3804         graph->adjncy = adjncy;
3805       }
3806       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3807       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3808       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3809 
3810       if (pcbddc->dbg_flag) {
3811         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3812         for (i=0;i<graph->ncc;i++) {
3813           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3814         }
3815       }
3816 
3817       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3818       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3819       for (i=0;i<pcis->n_B;i++) {
3820         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3821       }
3822       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3823 
3824       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3825       sum = 0;
3826       for (i=0;i<graph->ncc;i++) {
3827         PetscInt sizecc = 0;
3828         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3829           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3830             sizecc++;
3831           }
3832         }
3833         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3834           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3835             cxadj[graph->queue[j]] = sizecc;
3836           }
3837         }
3838         sum += sizecc*sizecc;
3839       }
3840       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3841       sum = 0;
3842       for (i=0;i<pcis->n;i++) {
3843         PetscInt temp = cxadj[i];
3844         cxadj[i] = sum;
3845         sum += temp;
3846       }
3847       cxadj[pcis->n] = sum;
3848       for (i=0;i<graph->ncc;i++) {
3849         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3850           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3851             PetscInt k,sizecc = 0;
3852             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3853               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3854                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3855                 sizecc++;
3856               }
3857             }
3858           }
3859         }
3860       }
3861       if (sum) {
3862         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3863       } else {
3864         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3865         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3866       }
3867       graph->xadj = 0;
3868       graph->adjncy = 0;
3869       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3870       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3871     }
3872   }
3873   if (pcbddc->dbg_flag) {
3874     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3875   }
3876 
3877   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3878   vertex_size = 1;
3879   if (pcbddc->user_provided_isfordofs) {
3880     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3881       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3882       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3883         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3884         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3885       }
3886       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3887       pcbddc->n_ISForDofs = 0;
3888       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3889     }
3890     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3891     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3892   } else {
3893     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3894       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3895       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3896       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3897         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3898       }
3899     }
3900   }
3901 
3902   /* Setup of Graph */
3903   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3904     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3905   }
3906   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3907     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3908   }
3909   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3910 
3911   /* attach info on disconnected subdomains if present */
3912   if (pcbddc->n_local_subs) {
3913     PetscInt *local_subs;
3914 
3915     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3916     for (i=0;i<pcbddc->n_local_subs;i++) {
3917       const PetscInt *idxs;
3918       PetscInt       nl,j;
3919 
3920       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3921       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3922       for (j=0;j<nl;j++) {
3923         local_subs[idxs[j]] = i;
3924       }
3925       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3926     }
3927     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3928     pcbddc->mat_graph->local_subs = local_subs;
3929   }
3930 
3931   /* Graph's connected components analysis */
3932   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3933 
3934   /* print some info to stdout */
3935   if (pcbddc->dbg_flag) {
3936     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3937   }
3938 
3939   /* mark topography has done */
3940   pcbddc->recompute_topography = PETSC_FALSE;
3941   PetscFunctionReturn(0);
3942 }
3943 
3944 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3945 #undef __FUNCT__
3946 #define __FUNCT__ "PCBDDCSubsetNumbering"
3947 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3948 {
3949   PetscSF        sf;
3950   PetscLayout    map;
3951   const PetscInt *idxs;
3952   PetscInt       *leaf_data,*root_data,*gidxs;
3953   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3954   PetscInt       n_n,nlocals,start,first_index;
3955   PetscMPIInt    commsize;
3956   PetscBool      first_found;
3957   PetscErrorCode ierr;
3958 
3959   PetscFunctionBegin;
3960   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3961   if (subset_mult) {
3962     PetscCheckSameComm(subset,1,subset_mult,2);
3963     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3964     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3965   }
3966   /* create workspace layout for computing global indices of subset */
3967   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3968   lbounds[0] = lbounds[1] = 0;
3969   for (i=0;i<n;i++) {
3970     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3971     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3972   }
3973   lbounds[0] = -lbounds[0];
3974   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3975   gbounds[0] = -gbounds[0];
3976   N = gbounds[1] - gbounds[0] + 1;
3977   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3978   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3979   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3980   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3981   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3982 
3983   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3984   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3985   if (subset_mult) {
3986     const PetscInt* idxs_mult;
3987 
3988     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3989     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3990     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3991   } else {
3992     for (i=0;i<n;i++) leaf_data[i] = 1;
3993   }
3994   /* local size of new subset */
3995   n_n = 0;
3996   for (i=0;i<n;i++) n_n += leaf_data[i];
3997 
3998   /* global indexes in layout */
3999   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
4000   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
4001   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
4002   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
4003   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
4004   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
4005 
4006   /* reduce from leaves to roots */
4007   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
4008   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4009   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
4010 
4011   /* count indexes in local part of layout */
4012   nlocals = 0;
4013   first_index = -1;
4014   first_found = PETSC_FALSE;
4015   for (i=0;i<Nl;i++) {
4016     if (!first_found && root_data[i]) {
4017       first_found = PETSC_TRUE;
4018       first_index = i;
4019     }
4020     nlocals += root_data[i];
4021   }
4022 
4023   /* cumulative of number of indexes and size of subset without holes */
4024 #if defined(PETSC_HAVE_MPI_EXSCAN)
4025   start = 0;
4026   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4027 #else
4028   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4029   start = start-nlocals;
4030 #endif
4031 
4032   if (N_n) { /* compute total size of new subset if requested */
4033     *N_n = start + nlocals;
4034     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
4035     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
4036   }
4037 
4038   /* adapt root data with cumulative */
4039   if (first_found) {
4040     PetscInt old_index;
4041 
4042     root_data[first_index] += start;
4043     old_index = first_index;
4044     for (i=first_index+1;i<Nl;i++) {
4045       if (root_data[i]) {
4046         root_data[i] += root_data[old_index];
4047         old_index = i;
4048       }
4049     }
4050   }
4051 
4052   /* from roots to leaves */
4053   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4054   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
4055   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
4056 
4057   /* create new IS with global indexes without holes */
4058   if (subset_mult) {
4059     const PetscInt* idxs_mult;
4060     PetscInt        cum;
4061 
4062     cum = 0;
4063     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4064     for (i=0;i<n;i++) {
4065       PetscInt j;
4066       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
4067     }
4068     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
4069   } else {
4070     for (i=0;i<n;i++) {
4071       gidxs[i] = leaf_data[i]-1;
4072     }
4073   }
4074   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
4075   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
4076   PetscFunctionReturn(0);
4077 }
4078 
4079 #undef __FUNCT__
4080 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
4081 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
4082 {
4083   PetscInt       i,j;
4084   PetscScalar    *alphas;
4085   PetscErrorCode ierr;
4086 
4087   PetscFunctionBegin;
4088   /* this implements stabilized Gram-Schmidt */
4089   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
4090   for (i=0;i<n;i++) {
4091     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
4092     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
4093     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
4094   }
4095   ierr = PetscFree(alphas);CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098 
4099 #undef __FUNCT__
4100 #define __FUNCT__ "MatISGetSubassemblingPattern"
4101 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
4102 {
4103   IS             ranks_send_to;
4104   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4105   PetscMPIInt    size,rank,color;
4106   PetscInt       *xadj,*adjncy;
4107   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4108   PetscInt       i,local_size,threshold=0;
4109   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4110   PetscSubcomm   subcomm;
4111   PetscErrorCode ierr;
4112 
4113   PetscFunctionBegin;
4114   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4115   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4116   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4117 
4118   /* Get info on mapping */
4119   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4120   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4121 
4122   /* build local CSR graph of subdomains' connectivity */
4123   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4124   xadj[0] = 0;
4125   xadj[1] = PetscMax(n_neighs-1,0);
4126   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4127   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4128 
4129   if (threshold) {
4130     PetscInt xadj_count = 0;
4131     for (i=1;i<n_neighs;i++) {
4132       if (n_shared[i] > threshold) {
4133         adjncy[xadj_count] = neighs[i];
4134         adjncy_wgt[xadj_count] = n_shared[i];
4135         xadj_count++;
4136       }
4137     }
4138     xadj[1] = xadj_count;
4139   } else {
4140     if (xadj[1]) {
4141       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4142       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4143     }
4144   }
4145   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4146   if (use_square) {
4147     for (i=0;i<xadj[1];i++) {
4148       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4149     }
4150   }
4151   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4152 
4153   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4154 
4155   /*
4156     Restrict work on active processes only.
4157   */
4158   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4159   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4160   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4161   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4162   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4163   if (color) {
4164     ierr = PetscFree(xadj);CHKERRQ(ierr);
4165     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4166     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4167   } else {
4168     Mat             subdomain_adj;
4169     IS              new_ranks,new_ranks_contig;
4170     MatPartitioning partitioner;
4171     PetscInt        prank,rstart=0,rend=0;
4172     PetscInt        *is_indices,*oldranks;
4173     PetscBool       aggregate;
4174 
4175     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4176     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4177     prank = rank;
4178     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4179     /*
4180     for (i=0;i<size;i++) {
4181       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4182     }
4183     */
4184     for (i=0;i<xadj[1];i++) {
4185       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4186     }
4187     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4188     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4189     if (aggregate) {
4190       PetscInt    lrows,row,ncols,*cols;
4191       PetscMPIInt nrank;
4192       PetscScalar *vals;
4193 
4194       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4195       lrows = 0;
4196       if (nrank<redprocs) {
4197         lrows = size/redprocs;
4198         if (nrank<size%redprocs) lrows++;
4199       }
4200       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4201       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4202       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4203       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4204       row = nrank;
4205       ncols = xadj[1]-xadj[0];
4206       cols = adjncy;
4207       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4208       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4209       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4210       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4211       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4212       ierr = PetscFree(xadj);CHKERRQ(ierr);
4213       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4214       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4215       ierr = PetscFree(vals);CHKERRQ(ierr);
4216     } else {
4217       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4218     }
4219     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4220 
4221     /* Partition */
4222     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4223     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4224     if (use_vwgt) {
4225       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4226       v_wgt[0] = local_size;
4227       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4228     }
4229     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4230     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4231     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4232     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4233     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4234 
4235     /* renumber new_ranks to avoid "holes" in new set of processors */
4236     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4237     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4238     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4239     if (!redprocs) {
4240       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4241     } else {
4242       PetscInt    idxs[1];
4243       PetscMPIInt tag;
4244       MPI_Request *reqs;
4245 
4246       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4247       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4248       for (i=rstart;i<rend;i++) {
4249         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4250       }
4251       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4252       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4253       ierr = PetscFree(reqs);CHKERRQ(ierr);
4254       ranks_send_to_idx[0] = oldranks[idxs[0]];
4255     }
4256     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4257     /* clean up */
4258     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4259     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4260     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4261     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4262   }
4263   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4264 
4265   /* assemble parallel IS for sends */
4266   i = 1;
4267   if (color) i=0;
4268   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4269   /* get back IS */
4270   *is_sends = ranks_send_to;
4271   PetscFunctionReturn(0);
4272 }
4273 
4274 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4275 
4276 #undef __FUNCT__
4277 #define __FUNCT__ "MatISSubassemble"
4278 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4279 {
4280   Mat                    local_mat;
4281   IS                     is_sends_internal;
4282   PetscInt               rows,cols,new_local_rows;
4283   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4284   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4285   ISLocalToGlobalMapping l2gmap;
4286   PetscInt*              l2gmap_indices;
4287   const PetscInt*        is_indices;
4288   MatType                new_local_type;
4289   /* buffers */
4290   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4291   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4292   PetscInt               *recv_buffer_idxs_local;
4293   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4294   /* MPI */
4295   MPI_Comm               comm,comm_n;
4296   PetscSubcomm           subcomm;
4297   PetscMPIInt            n_sends,n_recvs,commsize;
4298   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4299   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4300   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4301   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4302   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4303   PetscErrorCode         ierr;
4304 
4305   PetscFunctionBegin;
4306   /* TODO: add missing checks */
4307   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4308   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4309   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4310   PetscValidLogicalCollectiveInt(mat,nis,7);
4311   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4312   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4313   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4314   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4315   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4316   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4317   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4318   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4319     PetscInt mrows,mcols,mnrows,mncols;
4320     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4321     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4322     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4323     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4324     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4325     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4326   }
4327   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4328   PetscValidLogicalCollectiveInt(mat,bs,0);
4329   /* prepare IS for sending if not provided */
4330   if (!is_sends) {
4331     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4332     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4333   } else {
4334     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4335     is_sends_internal = is_sends;
4336   }
4337 
4338   /* get comm */
4339   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4340 
4341   /* compute number of sends */
4342   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4343   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4344 
4345   /* compute number of receives */
4346   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4347   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4348   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4349   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4350   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4351   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4352   ierr = PetscFree(iflags);CHKERRQ(ierr);
4353 
4354   /* restrict comm if requested */
4355   subcomm = 0;
4356   destroy_mat = PETSC_FALSE;
4357   if (restrict_comm) {
4358     PetscMPIInt color,subcommsize;
4359 
4360     color = 0;
4361     if (restrict_full) {
4362       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4363     } else {
4364       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4365     }
4366     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4367     subcommsize = commsize - subcommsize;
4368     /* check if reuse has been requested */
4369     if (reuse == MAT_REUSE_MATRIX) {
4370       if (*mat_n) {
4371         PetscMPIInt subcommsize2;
4372         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4373         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4374         comm_n = PetscObjectComm((PetscObject)*mat_n);
4375       } else {
4376         comm_n = PETSC_COMM_SELF;
4377       }
4378     } else { /* MAT_INITIAL_MATRIX */
4379       PetscMPIInt rank;
4380 
4381       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4382       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4383       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4384       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4385       comm_n = PetscSubcommChild(subcomm);
4386     }
4387     /* flag to destroy *mat_n if not significative */
4388     if (color) destroy_mat = PETSC_TRUE;
4389   } else {
4390     comm_n = comm;
4391   }
4392 
4393   /* prepare send/receive buffers */
4394   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4395   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4396   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4397   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4398   if (nis) {
4399     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4400   }
4401 
4402   /* Get data from local matrices */
4403   if (!isdense) {
4404     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4405     /* TODO: See below some guidelines on how to prepare the local buffers */
4406     /*
4407        send_buffer_vals should contain the raw values of the local matrix
4408        send_buffer_idxs should contain:
4409        - MatType_PRIVATE type
4410        - PetscInt        size_of_l2gmap
4411        - PetscInt        global_row_indices[size_of_l2gmap]
4412        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4413     */
4414   } else {
4415     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4416     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4417     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4418     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4419     send_buffer_idxs[1] = i;
4420     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4421     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4422     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4423     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4424     for (i=0;i<n_sends;i++) {
4425       ilengths_vals[is_indices[i]] = len*len;
4426       ilengths_idxs[is_indices[i]] = len+2;
4427     }
4428   }
4429   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4430   /* additional is (if any) */
4431   if (nis) {
4432     PetscMPIInt psum;
4433     PetscInt j;
4434     for (j=0,psum=0;j<nis;j++) {
4435       PetscInt plen;
4436       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4437       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4438       psum += len+1; /* indices + lenght */
4439     }
4440     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4441     for (j=0,psum=0;j<nis;j++) {
4442       PetscInt plen;
4443       const PetscInt *is_array_idxs;
4444       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4445       send_buffer_idxs_is[psum] = plen;
4446       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4447       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4448       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4449       psum += plen+1; /* indices + lenght */
4450     }
4451     for (i=0;i<n_sends;i++) {
4452       ilengths_idxs_is[is_indices[i]] = psum;
4453     }
4454     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4455   }
4456 
4457   buf_size_idxs = 0;
4458   buf_size_vals = 0;
4459   buf_size_idxs_is = 0;
4460   for (i=0;i<n_recvs;i++) {
4461     buf_size_idxs += (PetscInt)olengths_idxs[i];
4462     buf_size_vals += (PetscInt)olengths_vals[i];
4463     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4464   }
4465   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4466   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4467   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4468 
4469   /* get new tags for clean communications */
4470   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4471   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4472   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4473 
4474   /* allocate for requests */
4475   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4476   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4477   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4478   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4479   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4480   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4481 
4482   /* communications */
4483   ptr_idxs = recv_buffer_idxs;
4484   ptr_vals = recv_buffer_vals;
4485   ptr_idxs_is = recv_buffer_idxs_is;
4486   for (i=0;i<n_recvs;i++) {
4487     source_dest = onodes[i];
4488     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4489     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4490     ptr_idxs += olengths_idxs[i];
4491     ptr_vals += olengths_vals[i];
4492     if (nis) {
4493       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4494       ptr_idxs_is += olengths_idxs_is[i];
4495     }
4496   }
4497   for (i=0;i<n_sends;i++) {
4498     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4499     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4500     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4501     if (nis) {
4502       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4503     }
4504   }
4505   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4506   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4507 
4508   /* assemble new l2g map */
4509   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4510   ptr_idxs = recv_buffer_idxs;
4511   new_local_rows = 0;
4512   for (i=0;i<n_recvs;i++) {
4513     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4514     ptr_idxs += olengths_idxs[i];
4515   }
4516   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4517   ptr_idxs = recv_buffer_idxs;
4518   new_local_rows = 0;
4519   for (i=0;i<n_recvs;i++) {
4520     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4521     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4522     ptr_idxs += olengths_idxs[i];
4523   }
4524   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4525   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4526   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4527 
4528   /* infer new local matrix type from received local matrices type */
4529   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4530   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4531   if (n_recvs) {
4532     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4533     ptr_idxs = recv_buffer_idxs;
4534     for (i=0;i<n_recvs;i++) {
4535       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4536         new_local_type_private = MATAIJ_PRIVATE;
4537         break;
4538       }
4539       ptr_idxs += olengths_idxs[i];
4540     }
4541     switch (new_local_type_private) {
4542       case MATDENSE_PRIVATE:
4543         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4544           new_local_type = MATSEQAIJ;
4545           bs = 1;
4546         } else { /* if I receive only 1 dense matrix */
4547           new_local_type = MATSEQDENSE;
4548           bs = 1;
4549         }
4550         break;
4551       case MATAIJ_PRIVATE:
4552         new_local_type = MATSEQAIJ;
4553         bs = 1;
4554         break;
4555       case MATBAIJ_PRIVATE:
4556         new_local_type = MATSEQBAIJ;
4557         break;
4558       case MATSBAIJ_PRIVATE:
4559         new_local_type = MATSEQSBAIJ;
4560         break;
4561       default:
4562         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4563         break;
4564     }
4565   } else { /* by default, new_local_type is seqdense */
4566     new_local_type = MATSEQDENSE;
4567     bs = 1;
4568   }
4569 
4570   /* create MATIS object if needed */
4571   if (reuse == MAT_INITIAL_MATRIX) {
4572     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4573     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4574   } else {
4575     /* it also destroys the local matrices */
4576     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4577   }
4578   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4579   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4580 
4581   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4582 
4583   /* Global to local map of received indices */
4584   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4585   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4586   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4587 
4588   /* restore attributes -> type of incoming data and its size */
4589   buf_size_idxs = 0;
4590   for (i=0;i<n_recvs;i++) {
4591     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4592     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4593     buf_size_idxs += (PetscInt)olengths_idxs[i];
4594   }
4595   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4596 
4597   /* set preallocation */
4598   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4599   if (!newisdense) {
4600     PetscInt *new_local_nnz=0;
4601 
4602     ptr_vals = recv_buffer_vals;
4603     ptr_idxs = recv_buffer_idxs_local;
4604     if (n_recvs) {
4605       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4606     }
4607     for (i=0;i<n_recvs;i++) {
4608       PetscInt j;
4609       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4610         for (j=0;j<*(ptr_idxs+1);j++) {
4611           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4612         }
4613       } else {
4614         /* TODO */
4615       }
4616       ptr_idxs += olengths_idxs[i];
4617     }
4618     if (new_local_nnz) {
4619       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4620       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4621       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4622       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4623       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4624       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4625     } else {
4626       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4627     }
4628     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4629   } else {
4630     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4631   }
4632 
4633   /* set values */
4634   ptr_vals = recv_buffer_vals;
4635   ptr_idxs = recv_buffer_idxs_local;
4636   for (i=0;i<n_recvs;i++) {
4637     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4638       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4639       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4640       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4641       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4642       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4643     } else {
4644       /* TODO */
4645     }
4646     ptr_idxs += olengths_idxs[i];
4647     ptr_vals += olengths_vals[i];
4648   }
4649   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4650   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4651   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4652   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4653   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4654   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4655 
4656 #if 0
4657   if (!restrict_comm) { /* check */
4658     Vec       lvec,rvec;
4659     PetscReal infty_error;
4660 
4661     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4662     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4663     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4664     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4665     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4666     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4667     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4668     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4669     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4670   }
4671 #endif
4672 
4673   /* assemble new additional is (if any) */
4674   if (nis) {
4675     PetscInt **temp_idxs,*count_is,j,psum;
4676 
4677     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4678     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4679     ptr_idxs = recv_buffer_idxs_is;
4680     psum = 0;
4681     for (i=0;i<n_recvs;i++) {
4682       for (j=0;j<nis;j++) {
4683         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4684         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4685         psum += plen;
4686         ptr_idxs += plen+1; /* shift pointer to received data */
4687       }
4688     }
4689     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4690     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4691     for (i=1;i<nis;i++) {
4692       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4693     }
4694     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4695     ptr_idxs = recv_buffer_idxs_is;
4696     for (i=0;i<n_recvs;i++) {
4697       for (j=0;j<nis;j++) {
4698         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4699         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4700         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4701         ptr_idxs += plen+1; /* shift pointer to received data */
4702       }
4703     }
4704     for (i=0;i<nis;i++) {
4705       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4706       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4707       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4708     }
4709     ierr = PetscFree(count_is);CHKERRQ(ierr);
4710     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4711     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4712   }
4713   /* free workspace */
4714   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4715   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4716   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4717   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4718   if (isdense) {
4719     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4720     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4721   } else {
4722     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4723   }
4724   if (nis) {
4725     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4726     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4727   }
4728   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4729   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4730   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4731   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4732   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4733   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4734   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4735   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4736   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4737   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4738   ierr = PetscFree(onodes);CHKERRQ(ierr);
4739   if (nis) {
4740     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4741     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4742     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4743   }
4744   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4745   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4746     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4747     for (i=0;i<nis;i++) {
4748       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4749     }
4750     *mat_n = NULL;
4751   }
4752   PetscFunctionReturn(0);
4753 }
4754 
4755 /* temporary hack into ksp private data structure */
4756 #include <petsc/private/kspimpl.h>
4757 
4758 #undef __FUNCT__
4759 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4760 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4761 {
4762   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4763   PC_IS                  *pcis = (PC_IS*)pc->data;
4764   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4765   MatNullSpace           CoarseNullSpace=NULL;
4766   ISLocalToGlobalMapping coarse_islg;
4767   IS                     coarse_is,*isarray;
4768   PetscInt               i,im_active=-1,active_procs=-1;
4769   PetscInt               nis,nisdofs,nisneu;
4770   PC                     pc_temp;
4771   PCType                 coarse_pc_type;
4772   KSPType                coarse_ksp_type;
4773   PetscBool              multilevel_requested,multilevel_allowed;
4774   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4775   Mat                    t_coarse_mat_is;
4776   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4777   PetscMPIInt            all_procs;
4778   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4779   PetscBool              compute_vecs = PETSC_FALSE;
4780   PetscScalar            *array;
4781   PetscErrorCode         ierr;
4782 
4783   PetscFunctionBegin;
4784   /* Assign global numbering to coarse dofs */
4785   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4786     PetscInt ocoarse_size;
4787     compute_vecs = PETSC_TRUE;
4788     ocoarse_size = pcbddc->coarse_size;
4789     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4790     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4791     /* see if we can avoid some work */
4792     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4793       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4794       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4795         PC        pc;
4796         PetscBool isbddc;
4797 
4798         /* temporary workaround since PCBDDC does not have a reset method so far */
4799         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4800         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4801         if (isbddc) {
4802           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4803         }
4804         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4805         coarse_reuse = PETSC_FALSE;
4806       } else { /* we can safely reuse already computed coarse matrix */
4807         coarse_reuse = PETSC_TRUE;
4808       }
4809     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4810       coarse_reuse = PETSC_FALSE;
4811     }
4812     /* reset any subassembling information */
4813     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4814     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4815   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4816     coarse_reuse = PETSC_TRUE;
4817   }
4818 
4819   /* count "active" (i.e. with positive local size) and "void" processes */
4820   im_active = !!(pcis->n);
4821   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4822   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4823   void_procs = all_procs-active_procs;
4824   csin_type_simple = PETSC_TRUE;
4825   redist = PETSC_FALSE;
4826   if (pcbddc->current_level && void_procs) {
4827     csin_ml = PETSC_TRUE;
4828     ncoarse_ml = void_procs;
4829     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4830     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4831       csin_ds = PETSC_TRUE;
4832       ncoarse_ds = pcbddc->redistribute_coarse;
4833       redist = PETSC_TRUE;
4834     } else {
4835       csin_ds = PETSC_TRUE;
4836       ncoarse_ds = active_procs;
4837       redist = PETSC_TRUE;
4838     }
4839   } else {
4840     csin_ml = PETSC_FALSE;
4841     ncoarse_ml = all_procs;
4842     if (void_procs) {
4843       csin_ds = PETSC_TRUE;
4844       ncoarse_ds = void_procs;
4845       csin_type_simple = PETSC_FALSE;
4846     } else {
4847       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4848         csin_ds = PETSC_TRUE;
4849         ncoarse_ds = pcbddc->redistribute_coarse;
4850         redist = PETSC_TRUE;
4851       } else {
4852         csin_ds = PETSC_FALSE;
4853         ncoarse_ds = all_procs;
4854       }
4855     }
4856   }
4857 
4858   /*
4859     test if we can go multilevel: three conditions must be satisfied:
4860     - we have not exceeded the number of levels requested
4861     - we can actually subassemble the active processes
4862     - we can find a suitable number of MPI processes where we can place the subassembled problem
4863   */
4864   multilevel_allowed = PETSC_FALSE;
4865   multilevel_requested = PETSC_FALSE;
4866   if (pcbddc->current_level < pcbddc->max_levels) {
4867     multilevel_requested = PETSC_TRUE;
4868     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4869       multilevel_allowed = PETSC_FALSE;
4870     } else {
4871       multilevel_allowed = PETSC_TRUE;
4872     }
4873   }
4874   /* determine number of process partecipating to coarse solver */
4875   if (multilevel_allowed) {
4876     ncoarse = ncoarse_ml;
4877     csin = csin_ml;
4878     redist = PETSC_FALSE;
4879   } else {
4880     ncoarse = ncoarse_ds;
4881     csin = csin_ds;
4882   }
4883 
4884   /* creates temporary l2gmap and IS for coarse indexes */
4885   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4886   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4887 
4888   /* creates temporary MATIS object for coarse matrix */
4889   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4890   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4891   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4892   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4893 #if 0
4894   {
4895     PetscViewer viewer;
4896     char filename[256];
4897     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4898     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4899     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4900     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4901     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4902   }
4903 #endif
4904   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4905   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4906   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4907   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4908   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4909 
4910   /* compute dofs splitting and neumann boundaries for coarse dofs */
4911   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4912     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4913     const PetscInt         *idxs;
4914     ISLocalToGlobalMapping tmap;
4915 
4916     /* create map between primal indices (in local representative ordering) and local primal numbering */
4917     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4918     /* allocate space for temporary storage */
4919     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4920     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4921     /* allocate for IS array */
4922     nisdofs = pcbddc->n_ISForDofsLocal;
4923     nisneu = !!pcbddc->NeumannBoundariesLocal;
4924     nis = nisdofs + nisneu;
4925     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4926     /* dofs splitting */
4927     for (i=0;i<nisdofs;i++) {
4928       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4929       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4930       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4931       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4932       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4933       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4934       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4935       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4936     }
4937     /* neumann boundaries */
4938     if (pcbddc->NeumannBoundariesLocal) {
4939       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4940       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4941       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4942       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4943       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4944       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4945       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4946       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4947     }
4948     /* free memory */
4949     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4950     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4951     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4952   } else {
4953     nis = 0;
4954     nisdofs = 0;
4955     nisneu = 0;
4956     isarray = NULL;
4957   }
4958   /* destroy no longer needed map */
4959   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4960 
4961   /* restrict on coarse candidates (if needed) */
4962   coarse_mat_is = NULL;
4963   if (csin) {
4964     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4965       if (redist) {
4966         PetscMPIInt rank;
4967         PetscInt    spc,n_spc_p1,dest[1],destsize;
4968 
4969         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4970         spc = active_procs/ncoarse;
4971         n_spc_p1 = active_procs%ncoarse;
4972         if (im_active) {
4973           destsize = 1;
4974           if (rank > n_spc_p1*(spc+1)-1) {
4975             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4976           } else {
4977             dest[0] = rank/(spc+1);
4978           }
4979         } else {
4980           destsize = 0;
4981         }
4982         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4983       } else if (csin_type_simple) {
4984         PetscMPIInt rank;
4985         PetscInt    issize,isidx;
4986 
4987         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4988         if (im_active) {
4989           issize = 1;
4990           isidx = (PetscInt)rank;
4991         } else {
4992           issize = 0;
4993           isidx = -1;
4994         }
4995         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4996       } else { /* get a suitable subassembling pattern from MATIS code */
4997         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4998       }
4999 
5000       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
5001       if (!redist || ncoarse <= void_procs) {
5002         PetscInt ncoarse_cand,tissize,*nisindices;
5003         PetscInt *coarse_candidates;
5004         const PetscInt* tisindices;
5005 
5006         /* get coarse candidates' ranks in pc communicator */
5007         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
5008         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5009         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
5010           if (!coarse_candidates[i]) {
5011             coarse_candidates[ncoarse_cand++]=i;
5012           }
5013         }
5014         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
5015 
5016 
5017         if (pcbddc->dbg_flag) {
5018           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5019           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
5020           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5021           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
5022           for (i=0;i<ncoarse_cand;i++) {
5023             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
5024           }
5025           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
5026           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5027         }
5028         /* shift the pattern on coarse candidates */
5029         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
5030         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5031         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
5032         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
5033         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
5034         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
5035         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
5036       }
5037       if (pcbddc->dbg_flag) {
5038         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5039         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
5040         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
5041         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5042       }
5043     }
5044     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
5045     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
5046       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5047     } else { /* this is the last level, so use just receiving processes in subcomm */
5048       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
5049     }
5050   } else {
5051     if (pcbddc->dbg_flag) {
5052       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5053       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
5054       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5055     }
5056     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
5057     coarse_mat_is = t_coarse_mat_is;
5058   }
5059 
5060   /* create local to global scatters for coarse problem */
5061   if (compute_vecs) {
5062     PetscInt lrows;
5063     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
5064     if (coarse_mat_is) {
5065       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
5066     } else {
5067       lrows = 0;
5068     }
5069     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
5070     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
5071     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
5072     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5073     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
5074   }
5075   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
5076   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
5077 
5078   /* set defaults for coarse KSP and PC */
5079   if (multilevel_allowed) {
5080     coarse_ksp_type = KSPRICHARDSON;
5081     coarse_pc_type = PCBDDC;
5082   } else {
5083     coarse_ksp_type = KSPPREONLY;
5084     coarse_pc_type = PCREDUNDANT;
5085   }
5086 
5087   /* print some info if requested */
5088   if (pcbddc->dbg_flag) {
5089     if (!multilevel_allowed) {
5090       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5091       if (multilevel_requested) {
5092         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5093       } else if (pcbddc->max_levels) {
5094         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
5095       }
5096       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5097     }
5098   }
5099 
5100   /* create the coarse KSP object only once with defaults */
5101   if (coarse_mat_is) {
5102     MatReuse coarse_mat_reuse;
5103     PetscViewer dbg_viewer = NULL;
5104     if (pcbddc->dbg_flag) {
5105       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5106       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5107     }
5108     if (!pcbddc->coarse_ksp) {
5109       char prefix[256],str_level[16];
5110       size_t len;
5111       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5112       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5113       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5114       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5115       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5116       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5117       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5118       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5119       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5120       /* prefix */
5121       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5122       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5123       if (!pcbddc->current_level) {
5124         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5125         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5126       } else {
5127         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5128         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5129         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5130         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5131         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5132         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5133       }
5134       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5135       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5136       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5137       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5138       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5139       /* allow user customization */
5140       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5141     }
5142     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5143     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5144     if (nisdofs) {
5145       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5146       for (i=0;i<nisdofs;i++) {
5147         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5148       }
5149     }
5150     if (nisneu) {
5151       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5152       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5153     }
5154 
5155     /* get some info after set from options */
5156     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5157     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5158     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5159     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5160       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5161       isbddc = PETSC_FALSE;
5162     }
5163     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5164     if (isredundant) {
5165       KSP inner_ksp;
5166       PC  inner_pc;
5167       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5168       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5169       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5170     }
5171 
5172     /* assemble coarse matrix */
5173     if (coarse_reuse) {
5174       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5175       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5176       coarse_mat_reuse = MAT_REUSE_MATRIX;
5177     } else {
5178       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5179     }
5180     if (isbddc || isnn) {
5181       if (pcbddc->coarsening_ratio > 1) {
5182         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5183           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5184           if (pcbddc->dbg_flag) {
5185             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5186             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5187             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5188             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5189           }
5190         }
5191         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5192       } else {
5193         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5194         coarse_mat = coarse_mat_is;
5195       }
5196     } else {
5197       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5198     }
5199     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5200 
5201     /* propagate symmetry info of coarse matrix */
5202     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5203     if (pc->pmat->symmetric_set) {
5204       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5205     }
5206     if (pc->pmat->hermitian_set) {
5207       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5208     }
5209     if (pc->pmat->spd_set) {
5210       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5211     }
5212     /* set operators */
5213     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5214     if (pcbddc->dbg_flag) {
5215       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5216     }
5217   } else { /* processes non partecipating to coarse solver (if any) */
5218     coarse_mat = 0;
5219   }
5220   ierr = PetscFree(isarray);CHKERRQ(ierr);
5221 #if 0
5222   {
5223     PetscViewer viewer;
5224     char filename[256];
5225     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5226     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5227     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5228     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5229     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5230   }
5231 #endif
5232 
5233   /* Compute coarse null space (special handling by BDDC only) */
5234 #if 0
5235   if (pcbddc->NullSpace) {
5236     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5237   }
5238 #endif
5239   /* hack */
5240   if (pcbddc->coarse_ksp) {
5241     Vec crhs,csol;
5242 
5243     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5244     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5245     if (!csol) {
5246       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5247     }
5248     if (!crhs) {
5249       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5250     }
5251   }
5252 
5253   /* compute null space for coarse solver if the benign trick has been requested */
5254   if (pcbddc->benign_null) {
5255 
5256     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5257     for (i=0;i<pcbddc->benign_n;i++) {
5258       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5259     }
5260     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5261     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5262     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5263     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5264     if (coarse_mat) {
5265       Vec         nullv;
5266       PetscScalar *array,*array2;
5267       PetscInt    nl;
5268 
5269       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5270       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5271       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5272       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5273       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5274       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5275       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5276       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5277       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5278       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5279     }
5280   }
5281 
5282   if (pcbddc->coarse_ksp) {
5283     PetscBool ispreonly;
5284 
5285     if (CoarseNullSpace) {
5286       PetscBool isnull;
5287       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5288       if (isnull) {
5289         if (isbddc) {
5290           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5291         } else {
5292           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5293         }
5294       } else {
5295         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5296       }
5297     }
5298     /* setup coarse ksp */
5299     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5300     /* Check coarse problem if in debug mode or if solving with an iterative method */
5301     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5302     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5303       KSP       check_ksp;
5304       KSPType   check_ksp_type;
5305       PC        check_pc;
5306       Vec       check_vec,coarse_vec;
5307       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5308       PetscInt  its;
5309       PetscBool compute_eigs;
5310       PetscReal *eigs_r,*eigs_c;
5311       PetscInt  neigs;
5312       const char *prefix;
5313 
5314       /* Create ksp object suitable for estimation of extreme eigenvalues */
5315       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5316       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5317       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5318       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5319       if (ispreonly) {
5320         check_ksp_type = KSPPREONLY;
5321         compute_eigs = PETSC_FALSE;
5322       } else {
5323         check_ksp_type = KSPGMRES;
5324         compute_eigs = PETSC_TRUE;
5325       }
5326       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5327       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5328       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5329       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5330       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5331       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5332       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5333       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5334       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5335       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5336       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5337       /* create random vec */
5338       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5339       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5340       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5341       if (CoarseNullSpace) {
5342         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5343       }
5344       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5345       /* solve coarse problem */
5346       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5347       if (CoarseNullSpace) {
5348         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5349       }
5350       /* set eigenvalue estimation if preonly has not been requested */
5351       if (compute_eigs) {
5352         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5353         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5354         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5355         lambda_max = eigs_r[neigs-1];
5356         lambda_min = eigs_r[0];
5357         if (pcbddc->use_coarse_estimates) {
5358           if (lambda_max>lambda_min) {
5359             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5360             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5361           }
5362         }
5363       }
5364 
5365       /* check coarse problem residual error */
5366       if (pcbddc->dbg_flag) {
5367         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5368         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5369         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5370         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5371         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5372         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5373         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5374         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5375         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5376         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5377         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5378         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5379         if (CoarseNullSpace) {
5380           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5381         }
5382         if (compute_eigs) {
5383           PetscReal lambda_max_s,lambda_min_s;
5384           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5385           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5386           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5387           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5388           for (i=0;i<neigs;i++) {
5389             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5390           }
5391         }
5392         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5393         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5394       }
5395       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5396       if (compute_eigs) {
5397         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5398         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5399       }
5400     }
5401   }
5402   /* print additional info */
5403   if (pcbddc->dbg_flag) {
5404     /* waits until all processes reaches this point */
5405     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5406     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5407     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5408   }
5409 
5410   /* free memory */
5411   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5412   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5413   PetscFunctionReturn(0);
5414 }
5415 
5416 #undef __FUNCT__
5417 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5418 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5419 {
5420   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5421   PC_IS*         pcis = (PC_IS*)pc->data;
5422   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5423   IS             subset,subset_mult,subset_n;
5424   PetscInt       local_size,coarse_size=0;
5425   PetscInt       *local_primal_indices=NULL;
5426   const PetscInt *t_local_primal_indices;
5427   PetscErrorCode ierr;
5428 
5429   PetscFunctionBegin;
5430   /* Compute global number of coarse dofs */
5431   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5432     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5433   }
5434   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5435   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5436   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5437   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5438   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5439   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5440   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5441   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5442   if (local_size != pcbddc->local_primal_size) {
5443     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5444   }
5445   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5446   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5447   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5448   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5449   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5450 
5451   /* check numbering */
5452   if (pcbddc->dbg_flag) {
5453     PetscScalar coarsesum,*array,*array2;
5454     PetscInt    i;
5455     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5456 
5457     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5458     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5459     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5460     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5461     /* counter */
5462     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5463     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5464     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5465     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5466     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5467     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5468 
5469     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5470     for (i=0;i<pcbddc->local_primal_size;i++) {
5471       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5472     }
5473     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5474     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5475     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5476     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5477     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5478     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5479     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5480     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5481     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5482     for (i=0;i<pcis->n;i++) {
5483       if (array[i] != 0.0 && array[i] != array2[i]) {
5484         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5485         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5486         set_error = PETSC_TRUE;
5487         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5488       }
5489     }
5490     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5491     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5492     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5493     for (i=0;i<pcis->n;i++) {
5494       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5495     }
5496     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5497     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5498     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5499     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5500     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5501     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5502     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5503       PetscInt *gidxs;
5504 
5505       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5506       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5507       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5508       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5509       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5510       for (i=0;i<pcbddc->local_primal_size;i++) {
5511         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5512       }
5513       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5514       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5515     }
5516     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5517     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5518   }
5519   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5520   /* get back data */
5521   *coarse_size_n = coarse_size;
5522   *local_primal_indices_n = local_primal_indices;
5523   PetscFunctionReturn(0);
5524 }
5525 
5526 #undef __FUNCT__
5527 #define __FUNCT__ "PCBDDCGlobalToLocal"
5528 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5529 {
5530   IS             localis_t;
5531   PetscInt       i,lsize,*idxs,n;
5532   PetscScalar    *vals;
5533   PetscErrorCode ierr;
5534 
5535   PetscFunctionBegin;
5536   /* get indices in local ordering exploiting local to global map */
5537   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5538   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5539   for (i=0;i<lsize;i++) vals[i] = 1.0;
5540   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5541   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5542   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5543   if (idxs) { /* multilevel guard */
5544     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5545   }
5546   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5547   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5548   ierr = PetscFree(vals);CHKERRQ(ierr);
5549   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5550   /* now compute set in local ordering */
5551   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5552   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5553   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5554   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5555   for (i=0,lsize=0;i<n;i++) {
5556     if (PetscRealPart(vals[i]) > 0.5) {
5557       lsize++;
5558     }
5559   }
5560   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5561   for (i=0,lsize=0;i<n;i++) {
5562     if (PetscRealPart(vals[i]) > 0.5) {
5563       idxs[lsize++] = i;
5564     }
5565   }
5566   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5567   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5568   *localis = localis_t;
5569   PetscFunctionReturn(0);
5570 }
5571 
5572 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5573 #undef __FUNCT__
5574 #define __FUNCT__ "PCBDDCMatMult_Private"
5575 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5576 {
5577   PCBDDCChange_ctx change_ctx;
5578   PetscErrorCode   ierr;
5579 
5580   PetscFunctionBegin;
5581   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5582   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5583   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5584   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5585   PetscFunctionReturn(0);
5586 }
5587 
5588 #undef __FUNCT__
5589 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5590 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5591 {
5592   PCBDDCChange_ctx change_ctx;
5593   PetscErrorCode   ierr;
5594 
5595   PetscFunctionBegin;
5596   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5597   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5598   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5599   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5600   PetscFunctionReturn(0);
5601 }
5602 
5603 #undef __FUNCT__
5604 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5605 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5606 {
5607   PC_IS               *pcis=(PC_IS*)pc->data;
5608   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5609   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5610   Mat                 S_j;
5611   PetscInt            *used_xadj,*used_adjncy;
5612   PetscBool           free_used_adj;
5613   PetscErrorCode      ierr;
5614 
5615   PetscFunctionBegin;
5616   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5617   free_used_adj = PETSC_FALSE;
5618   if (pcbddc->sub_schurs_layers == -1) {
5619     used_xadj = NULL;
5620     used_adjncy = NULL;
5621   } else {
5622     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5623       used_xadj = pcbddc->mat_graph->xadj;
5624       used_adjncy = pcbddc->mat_graph->adjncy;
5625     } else if (pcbddc->computed_rowadj) {
5626       used_xadj = pcbddc->mat_graph->xadj;
5627       used_adjncy = pcbddc->mat_graph->adjncy;
5628     } else {
5629       PetscBool      flg_row=PETSC_FALSE;
5630       const PetscInt *xadj,*adjncy;
5631       PetscInt       nvtxs;
5632 
5633       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5634       if (flg_row) {
5635         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5636         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5637         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5638         free_used_adj = PETSC_TRUE;
5639       } else {
5640         pcbddc->sub_schurs_layers = -1;
5641         used_xadj = NULL;
5642         used_adjncy = NULL;
5643       }
5644       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5645     }
5646   }
5647 
5648   /* setup sub_schurs data */
5649   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5650   if (!sub_schurs->use_mumps) {
5651     /* pcbddc->ksp_D up to date only if not using MUMPS */
5652     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5653     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5654   } else {
5655     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5656     PetscBool isseqaij;
5657     if (!pcbddc->use_vertices && reuse_solvers) {
5658       PetscInt n_vertices;
5659 
5660       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5661       reuse_solvers = (PetscBool)!n_vertices;
5662     }
5663     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5664     if (!isseqaij) {
5665       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5666       if (matis->A == pcbddc->local_mat) {
5667         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5668         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5669       } else {
5670         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5671       }
5672     }
5673     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5674   }
5675   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5676 
5677   /* free adjacency */
5678   if (free_used_adj) {
5679     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5680   }
5681   PetscFunctionReturn(0);
5682 }
5683 
5684 #undef __FUNCT__
5685 #define __FUNCT__ "PCBDDCInitSubSchurs"
5686 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5687 {
5688   PC_IS               *pcis=(PC_IS*)pc->data;
5689   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5690   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5691   PCBDDCGraph         graph;
5692   PetscErrorCode      ierr;
5693 
5694   PetscFunctionBegin;
5695   /* attach interface graph for determining subsets */
5696   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5697     IS       verticesIS,verticescomm;
5698     PetscInt vsize,*idxs;
5699 
5700     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5701     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5702     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5703     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5704     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5705     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5706     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5707     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5708     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5709     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5710     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5711 /*
5712     if (pcbddc->dbg_flag) {
5713       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5714     }
5715 */
5716   } else {
5717     graph = pcbddc->mat_graph;
5718   }
5719 
5720   /* sub_schurs init */
5721   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5722 
5723   /* free graph struct */
5724   if (pcbddc->sub_schurs_rebuild) {
5725     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5726   }
5727   PetscFunctionReturn(0);
5728 }
5729 
5730 #undef __FUNCT__
5731 #define __FUNCT__ "PCBDDCCheckOperator"
5732 PetscErrorCode PCBDDCCheckOperator(PC pc)
5733 {
5734   PC_IS               *pcis=(PC_IS*)pc->data;
5735   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5736   PetscErrorCode      ierr;
5737 
5738   PetscFunctionBegin;
5739   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5740     IS             zerodiag = NULL;
5741     Mat            S_j,B0_B=NULL;
5742     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5743     PetscScalar    *p0_check,*array,*array2;
5744     PetscReal      norm;
5745     PetscInt       i;
5746 
5747     /* B0 and B0_B */
5748     if (zerodiag) {
5749       IS       dummy;
5750 
5751       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5752       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5753       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5754       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5755     }
5756     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5757     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5758     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5759     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5760     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5761     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5762     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5763     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5764     /* S_j */
5765     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5766     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5767 
5768     /* mimic vector in \widetilde{W}_\Gamma */
5769     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5770     /* continuous in primal space */
5771     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5772     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5773     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5774     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5775     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5776     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5777     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5778     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5779     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5780     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5781     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5782     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5783     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5784     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5785 
5786     /* assemble rhs for coarse problem */
5787     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5788     /* local with Schur */
5789     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5790     if (zerodiag) {
5791       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5792       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5793       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5794       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5795     }
5796     /* sum on primal nodes the local contributions */
5797     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5798     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5799     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5800     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5801     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5802     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5803     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5804     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5805     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5806     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5807     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5808     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5809     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5810     /* scale primal nodes (BDDC sums contibutions) */
5811     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5812     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5813     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5814     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5815     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5816     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5817     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5818     /* global: \widetilde{B0}_B w_\Gamma */
5819     if (zerodiag) {
5820       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5821       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5822       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5823       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5824     }
5825     /* BDDC */
5826     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5827     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5828 
5829     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5830     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5831     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5832     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5833     for (i=0;i<pcbddc->benign_n;i++) {
5834       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5835     }
5836     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5837     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5838     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5839     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5840     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5841     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5842   }
5843   PetscFunctionReturn(0);
5844 }
5845