xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ba14f8e366a051a4047525494bb330cf24b3e577)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatDetectDisconnectedComponents"
10 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
11 {
12   Mat                    B;
13   IS                     is_dummy,*cc_n;
14   ISLocalToGlobalMapping l2gmap_dummy;
15   PCBDDCGraph            graph;
16   PetscInt               i,n;
17   PetscInt               *xadj,*adjncy;
18   PetscInt               *xadj_filtered,*adjncy_filtered;
19   PetscBool              flg_row,isseqaij;
20   PetscErrorCode         ierr;
21 
22   PetscFunctionBegin;
23   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
24   if (!isseqaij && filter) {
25     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
26   } else {
27     B = A;
28   }
29   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
30 
31   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
32   if (filter) {
33     PetscScalar *data;
34     PetscInt    j,cum;
35 
36     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
37     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
38     cum = 0;
39     for (i=0;i<n;i++) {
40       PetscInt t;
41 
42       for (j=xadj[i];j<xadj[i+1];j++) {
43         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
44           continue;
45         }
46         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
47       }
48       t = xadj_filtered[i];
49       xadj_filtered[i] = cum;
50       cum += t;
51     }
52     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
53   } else {
54     xadj_filtered = NULL;
55     adjncy_filtered = NULL;
56   }
57 
58   /* compute local connected components using PCBDDCGraph */
59   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
60   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
61   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
62   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
63   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
64   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
65   if (xadj_filtered) {
66     graph->xadj = xadj_filtered;
67     graph->adjncy = adjncy_filtered;
68   } else {
69     graph->xadj = xadj;
70     graph->adjncy = adjncy;
71   }
72   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
73   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
74 
75   /* partial clean up */
76   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
77   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
78   if (!isseqaij && filter) {
79     ierr = MatDestroy(&B);CHKERRQ(ierr);
80   }
81 
82   /* get back data */
83   *ncc = graph->ncc;
84   ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
85   for (i=0;i<graph->ncc;i++) {
86     ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
87   }
88   *cc = cc_n;
89 
90   /* clean up graph */
91   graph->xadj = 0;
92   graph->adjncy = 0;
93   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "PCBDDCBenignCheck"
99 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
100 {
101   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
102   PC_IS*         pcis = (PC_IS*)(pc->data);
103   IS             dirIS = NULL;
104   PetscInt       i;
105   PetscErrorCode ierr;
106 
107   PetscFunctionBegin;
108   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
109   if (zerodiag) {
110     Mat            A;
111     Vec            vec3_N;
112     PetscScalar    *vals;
113     const PetscInt *idxs;
114     PetscInt       nz;
115 
116     /* p0 */
117     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
118     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
119     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
120     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
121     for (i=0;i<nz;i++) vals[i] = 1.;
122     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
123     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
124     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
125     /* v_I */
126     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
127     for (i=0;i<nz;i++) vals[i] = 0.;
128     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
130     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
131     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
132     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
133     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
134     if (dirIS) {
135       PetscInt n;
136 
137       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
138       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
139       for (i=0;i<n;i++) vals[i] = 0.;
140       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
141       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
142     }
143     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
144     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
146     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
147     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
148     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
149     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
150     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
151       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
152     }
153     ierr = PetscFree(vals);CHKERRQ(ierr);
154     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
155   }
156   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
157 
158   /* check PCBDDCBenignGetOrSetP0 */
159   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
160   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
161   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
162   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
163   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
164   for (i=0;i<pcbddc->benign_n;i++) {
165     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
166       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
167     }
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
174 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
175 {
176   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
177   IS             pressures,zerodiag,*zerodiag_subs;
178   PetscInt       nz,n;
179   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
184   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
185   pcbddc->benign_original_mat = pcbddc->local_mat;
186   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
187      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
188      Checks if all the pressure dofs in each subdomain have a zero diagonal
189      If not, a change of basis on pressures is not needed
190      since the local Schur complements are SPD
191   */
192   has_null_pressures = PETSC_TRUE;
193   have_null = PETSC_TRUE;
194   if (pcbddc->n_ISForDofsLocal) {
195     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
196 
197     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
198     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
199     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
200     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
201     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
202     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
203     if (!sorted) {
204       ierr = ISSort(pressures);CHKERRQ(ierr);
205     }
206   } else {
207     pressures = NULL;
208   }
209   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
210   /* TODO: add check for shared dofs and raise error */
211   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
212   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
213   if (!sorted) {
214     ierr = ISSort(zerodiag);CHKERRQ(ierr);
215   }
216   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
217   if (!nz) {
218     if (n) have_null = PETSC_FALSE;
219     has_null_pressures = PETSC_FALSE;
220     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
221   }
222   recompute_zerodiag = PETSC_FALSE;
223   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
224   zerodiag_subs = NULL;
225   pcbddc->benign_n = 0;
226   if (has_null_pressures) {
227     IS       *subs;
228     PetscInt nsubs,i;
229 
230     subs = pcbddc->local_subs;
231     nsubs = pcbddc->n_local_subs;
232     if (nsubs > 1) {
233       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
234       for (i=0;i<nsubs;i++) {
235         ISLocalToGlobalMapping l2g;
236         IS                     t_zerodiag_subs;
237         PetscInt               nl;
238 
239         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
240         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
241         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
242         if (nl) {
243           PetscBool valid = PETSC_TRUE;
244 
245           if (pressures) {
246             IS t_pressure_subs;
247             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
248             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
249             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
250           }
251           if (valid) {
252             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
253             pcbddc->benign_n++;
254           } else {
255             recompute_zerodiag = PETSC_TRUE;
256           }
257         }
258         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
259         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
260       }
261     } else { /* there's just one subdomain (or zero if they have not been detected */
262       PetscBool valid = PETSC_TRUE;
263       if (pressures) {
264         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
265       }
266       if (valid) {
267         pcbddc->benign_n = 1;
268         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
269         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
270         zerodiag_subs[0] = zerodiag;
271       }
272     }
273   }
274 
275   if (!pcbddc->benign_n) {
276     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
277     recompute_zerodiag = PETSC_FALSE;
278     has_null_pressures = PETSC_FALSE;
279     have_null = PETSC_FALSE;
280   }
281 
282   /* final check for null pressures */
283   if (zerodiag && pressures) {
284     PetscInt nz,np;
285     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
286     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
287     if (nz != np) have_null = PETSC_FALSE;
288   }
289 
290   if (recompute_zerodiag) {
291     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
292     if (pcbddc->benign_n == 1) {
293       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
294       zerodiag = zerodiag_subs[0];
295     } else {
296       PetscInt i,nzn,*new_idxs;
297 
298       nzn = 0;
299       for (i=0;i<pcbddc->benign_n;i++) {
300         PetscInt ns;
301         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
302         nzn += ns;
303       }
304       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
305       nzn = 0;
306       for (i=0;i<pcbddc->benign_n;i++) {
307         PetscInt ns,*idxs;
308         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
309         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
310         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
311         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
312         nzn += ns;
313       }
314       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
315       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
316     }
317     have_null = PETSC_FALSE;
318   }
319 
320   if (has_null_pressures) {
321     IS             zerodiagc;
322     const PetscInt *idxs,*idxsc;
323     PetscInt       i,s,*nnz;
324 
325     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
326     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
327     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
328     /* local change of basis for pressures */
329     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
330     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
331     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
332     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
333     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
334     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
335     for (i=0;i<pcbddc->benign_n;i++) {
336       PetscInt nzs,j;
337 
338       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
339       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
340       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
341       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
342       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
343     }
344     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
345     ierr = PetscFree(nnz);CHKERRQ(ierr);
346     /* set identity on velocities */
347     for (i=0;i<n-nz;i++) {
348       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
349     }
350     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
351     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
352     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
353     /* set change on pressures */
354     for (s=0;s<pcbddc->benign_n;s++) {
355       PetscScalar *array;
356       PetscInt    nzs;
357 
358       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
359       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
360       for (i=0;i<nzs-1;i++) {
361         PetscScalar vals[2];
362         PetscInt    cols[2];
363 
364         cols[0] = idxs[i];
365         cols[1] = idxs[nzs-1];
366         vals[0] = 1.;
367         vals[1] = 1.;
368         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
369       }
370       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
371       for (i=0;i<nzs-1;i++) array[i] = -1.;
372       array[nzs-1] = 1.;
373       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
374       /* store local idxs for p0 */
375       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
376       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
377       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
378       ierr = PetscFree(array);CHKERRQ(ierr);
379     }
380     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
381     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
382     /* TODO: need optimization? */
383     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
384     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
385     /* store global idxs for p0 */
386     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
387   }
388   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
389   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
390 
391   /* determines if the coarse solver will be singular or not */
392   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
393   *zerodiaglocal = zerodiag;
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
399 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
400 {
401   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   if (!pcbddc->benign_sf) {
406     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
407     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
408   }
409   if (get) { /* use SF to get values */
410     PetscScalar *array;
411 
412     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
413     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
414     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
415     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
416   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
417     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
418     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
419     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
426 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
427 {
428   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   /* TODO: add error checking
433     - avoid nested pop (or push) calls.
434     - cannot push before pop.
435     - cannot call this if pcbddc->local_mat is NULL
436   */
437   if (!pcbddc->benign_n) {
438     PetscFunctionReturn(0);
439   }
440   if (pop) {
441     IS       is_p0;
442     MatReuse reuse;
443 
444     /* extract B_0 */
445     reuse = MAT_INITIAL_MATRIX;
446     if (pcbddc->benign_B0) {
447       reuse = MAT_REUSE_MATRIX;
448     }
449     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
450     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
451     /* remove rows and cols from local problem */
452     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
453     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
454     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
455   } else { /* push */
456     PetscInt i;
457 
458     for (i=0;i<pcbddc->benign_n;i++) {
459       PetscScalar *B0_vals;
460       PetscInt    *B0_cols,B0_ncol;
461 
462       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
463       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
464       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
465       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
466       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
467     }
468     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
469     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "PCBDDCAdaptiveSelection"
476 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
477 {
478   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
479   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
480   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
481   PetscBLASInt    *B_iwork,*B_ifail;
482   PetscScalar     *work,lwork;
483   PetscScalar     *St,*S,*eigv;
484   PetscScalar     *Sarray,*Starray;
485   PetscReal       *eigs,thresh;
486   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
487   PetscBool       allocated_S_St;
488 #if defined(PETSC_USE_COMPLEX)
489   PetscReal       *rwork;
490 #endif
491   PetscErrorCode  ierr;
492 
493   PetscFunctionBegin;
494   if (!sub_schurs->use_mumps) {
495     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
496   }
497 
498   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
499     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
500   }
501 
502   if (pcbddc->dbg_flag) {
503     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
504     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
505     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
506     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
507   }
508 
509   if (pcbddc->dbg_flag) {
510     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
511   }
512 
513   /* max size of subsets */
514   mss = 0;
515   for (i=0;i<sub_schurs->n_subs;i++) {
516     PetscInt subset_size;
517 
518     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
519     mss = PetscMax(mss,subset_size);
520   }
521 
522   /* min/max and threshold */
523   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
524   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
525   nmax = PetscMax(nmin,nmax);
526   allocated_S_St = PETSC_FALSE;
527   if (nmin) {
528     allocated_S_St = PETSC_TRUE;
529   }
530 
531   /* allocate lapack workspace */
532   cum = cum2 = 0;
533   maxneigs = 0;
534   for (i=0;i<sub_schurs->n_subs;i++) {
535     PetscInt n,subset_size;
536 
537     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
538     n = PetscMin(subset_size,nmax);
539     cum += subset_size;
540     cum2 += subset_size*n;
541     maxneigs = PetscMax(maxneigs,n);
542   }
543   if (mss) {
544     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
545       PetscBLASInt B_itype = 1;
546       PetscBLASInt B_N = mss;
547       PetscReal    zero = 0.0;
548       PetscReal    eps = 0.0; /* dlamch? */
549 
550       B_lwork = -1;
551       S = NULL;
552       St = NULL;
553       eigs = NULL;
554       eigv = NULL;
555       B_iwork = NULL;
556       B_ifail = NULL;
557 #if defined(PETSC_USE_COMPLEX)
558       rwork = NULL;
559 #endif
560       thresh = 1.0;
561       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
562 #if defined(PETSC_USE_COMPLEX)
563       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
564 #else
565       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
566 #endif
567       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
568       ierr = PetscFPTrapPop();CHKERRQ(ierr);
569     } else {
570         /* TODO */
571     }
572   } else {
573     lwork = 0;
574   }
575 
576   nv = 0;
577   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
578     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
579   }
580   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
581   if (allocated_S_St) {
582     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
583   }
584   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
585 #if defined(PETSC_USE_COMPLEX)
586   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
587 #endif
588   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
589                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
590                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
591                       nv+cum,&pcbddc->adaptive_constraints_idxs,
592                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
593   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
594 
595   maxneigs = 0;
596   cum = cum2 = cumarray = 0;
597   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
598   pcbddc->adaptive_constraints_data_ptr[0] = 0;
599   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
600     const PetscInt *idxs;
601 
602     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
603     for (cum=0;cum<nv;cum++) {
604       pcbddc->adaptive_constraints_n[cum] = 1;
605       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
606       pcbddc->adaptive_constraints_data[cum] = 1.0;
607       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
608       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
609     }
610     cum2 = cum;
611     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
612   }
613 
614   if (mss) { /* multilevel */
615     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
616     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
617   }
618 
619   for (i=0;i<sub_schurs->n_subs;i++) {
620 
621     const PetscInt *idxs;
622     PetscReal      infty = PETSC_MAX_REAL;
623     PetscInt       j,subset_size,eigs_start = 0;
624     PetscBLASInt   B_N;
625     PetscBool      same_data = PETSC_FALSE;
626 
627     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
628     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
629     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
630       if (sub_schurs->is_hermitian) {
631         PetscInt j,k;
632         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
633           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
634           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
635         }
636         for (j=0;j<subset_size;j++) {
637           for (k=j;k<subset_size;k++) {
638             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
639             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
640           }
641         }
642       } else {
643         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
644         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
645       }
646     } else {
647       S = Sarray + cumarray;
648       St = Starray + cumarray;
649     }
650 
651     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
652     /* see if we can save some work */
653     if (sub_schurs->n_subs == 1) {
654       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
655     }
656 
657     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
658       B_neigs = 0;
659     } else {
660       /* Threshold: this is an heuristic for edges */
661       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
662 
663       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
664         PetscBLASInt B_itype = 1;
665         PetscBLASInt B_IL, B_IU;
666         PetscReal    eps = -1.0; /* dlamch? */
667         PetscInt     nmin_s;
668 
669         if (pcbddc->dbg_flag) {
670           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
671         }
672 
673         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
674         if (thresh > 1.+PETSC_SMALL) {
675 
676           /* ask for eigenvalues larger than thresh */
677 #if defined(PETSC_USE_COMPLEX)
678           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
679 #else
680           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
681 #endif
682         } else {
683           B_IU = PetscMax(1,PetscMin(B_N,nmax));
684           B_IL = 1;
685 #if defined(PETSC_USE_COMPLEX)
686           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
687 #else
688           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
689 #endif
690         }
691         ierr = PetscFPTrapPop();CHKERRQ(ierr);
692         if (B_ierr) {
693           if (B_ierr < 0 ) {
694             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
695           } else if (B_ierr <= B_N) {
696             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
697           } else {
698             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
699           }
700         }
701 
702         if (B_neigs > nmax) {
703           if (pcbddc->dbg_flag) {
704             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
705           }
706           eigs_start = B_neigs -nmax;
707           B_neigs = nmax;
708         }
709 
710         nmin_s = PetscMin(nmin,B_N);
711         if (B_neigs < nmin_s) {
712           PetscBLASInt B_neigs2;
713 
714           B_IU = B_N - B_neigs;
715           B_IL = B_N - nmin_s + 1;
716           if (pcbddc->dbg_flag) {
717             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
718           }
719           if (sub_schurs->is_hermitian) {
720             PetscInt j;
721             for (j=0;j<subset_size;j++) {
722               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
723             }
724             for (j=0;j<subset_size;j++) {
725               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
726             }
727           } else {
728             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
729             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
730           }
731           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
732 #if defined(PETSC_USE_COMPLEX)
733           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
734 #else
735           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
736 #endif
737           ierr = PetscFPTrapPop();CHKERRQ(ierr);
738           B_neigs += B_neigs2;
739         }
740         if (B_ierr) {
741           if (B_ierr < 0 ) {
742             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
743           } else if (B_ierr <= B_N) {
744             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
745           } else {
746             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
747           }
748         }
749         if (pcbddc->dbg_flag) {
750           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
751           for (j=0;j<B_neigs;j++) {
752             if (eigs[j] == 0.0) {
753               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
754             } else {
755               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
756             }
757           }
758         }
759       } else {
760           /* TODO */
761       }
762     }
763     maxneigs = PetscMax(B_neigs,maxneigs);
764     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
765     if (B_neigs) {
766       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
767 
768       if (pcbddc->dbg_flag > 1) {
769         PetscInt ii;
770         for (ii=0;ii<B_neigs;ii++) {
771           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
772           for (j=0;j<B_N;j++) {
773 #if defined(PETSC_USE_COMPLEX)
774             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
775             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
776             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
777 #else
778             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
779 #endif
780           }
781         }
782       }
783 #if 0
784       for (j=0;j<B_neigs;j++) {
785         PetscBLASInt Blas_N,Blas_one = 1.0;
786         PetscScalar norm;
787         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
788         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
789                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
790         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
791           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
792         } else {
793           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
794         }
795         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
796       }
797 #endif
798       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
799       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
800       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
801       cum++;
802     }
803     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
804     /* shift for next computation */
805     cumarray += subset_size*subset_size;
806   }
807   if (pcbddc->dbg_flag) {
808     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
809   }
810 
811   if (mss) {
812     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
813     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
814     /* destroy matrices (junk) */
815     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
816     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
817   }
818   if (allocated_S_St) {
819     ierr = PetscFree2(S,St);CHKERRQ(ierr);
820   }
821   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
822 #if defined(PETSC_USE_COMPLEX)
823   ierr = PetscFree(rwork);CHKERRQ(ierr);
824 #endif
825   if (pcbddc->dbg_flag) {
826     PetscInt maxneigs_r;
827     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
828     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "PCBDDCSetUpSolvers"
835 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
836 {
837   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
838   PetscScalar    *coarse_submat_vals;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* Setup local scatters R_to_B and (optionally) R_to_D */
843   /* PCBDDCSetUpLocalWorkVectors should be called first! */
844   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
845 
846   /* Setup local neumann solver ksp_R */
847   /* PCBDDCSetUpLocalScatters should be called first! */
848   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
849 
850   /* Change global null space passed in by the user if change of basis has been requested */
851   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
852     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
853   }
854 
855   /*
856      Setup local correction and local part of coarse basis.
857      Gives back the dense local part of the coarse matrix in column major ordering
858   */
859   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
860 
861   /* Compute total number of coarse nodes and setup coarse solver */
862   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
863 
864   /* free */
865   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "PCBDDCResetCustomization"
871 PetscErrorCode PCBDDCResetCustomization(PC pc)
872 {
873   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
878   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
879   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
880   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
881   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
882   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
883   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
884   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
885   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
886   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
887   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "PCBDDCResetTopography"
893 PetscErrorCode PCBDDCResetTopography(PC pc)
894 {
895   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
896   PetscInt       i;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
901   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
902   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
903   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
904   for (i=0;i<pcbddc->n_local_subs;i++) {
905     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
906   }
907   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
908   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "PCBDDCResetSolvers"
914 PetscErrorCode PCBDDCResetSolvers(PC pc)
915 {
916   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
917   PetscScalar    *array;
918   PetscErrorCode ierr;
919 
920   PetscFunctionBegin;
921   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
922   if (pcbddc->coarse_phi_B) {
923     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
924     ierr = PetscFree(array);CHKERRQ(ierr);
925   }
926   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
927   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
928   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
929   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
930   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
931   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
932   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
933   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
934   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
935   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
936   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
937   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
938   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
939   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
940   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
941   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
942   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
943   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
944   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
945   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
946   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
947   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
948   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
949   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
950   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
951   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
952   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
953   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
954   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
955   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
961 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
962 {
963   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
964   PC_IS          *pcis = (PC_IS*)pc->data;
965   VecType        impVecType;
966   PetscInt       n_constraints,n_R,old_size;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   if (!pcbddc->ConstraintMatrix) {
971     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
972   }
973   /* get sizes */
974   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
975   n_R = pcis->n - pcbddc->n_vertices;
976   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
977   /* local work vectors (try to avoid unneeded work)*/
978   /* R nodes */
979   old_size = -1;
980   if (pcbddc->vec1_R) {
981     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
982   }
983   if (n_R != old_size) {
984     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
985     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
986     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
987     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
988     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
989     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
990   }
991   /* local primal dofs */
992   old_size = -1;
993   if (pcbddc->vec1_P) {
994     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
995   }
996   if (pcbddc->local_primal_size != old_size) {
997     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
998     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
999     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1000     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1001   }
1002   /* local explicit constraints */
1003   old_size = -1;
1004   if (pcbddc->vec1_C) {
1005     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1006   }
1007   if (n_constraints && n_constraints != old_size) {
1008     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1009     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1010     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1011     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "PCBDDCSetUpCorrection"
1018 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1019 {
1020   PetscErrorCode  ierr;
1021   /* pointers to pcis and pcbddc */
1022   PC_IS*          pcis = (PC_IS*)pc->data;
1023   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1024   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1025   /* submatrices of local problem */
1026   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1027   /* submatrices of benign trick */
1028   Mat             B0_V = NULL;
1029   /* submatrices of local coarse problem */
1030   Mat             S_VV,S_CV,S_VC,S_CC;
1031   /* working matrices */
1032   Mat             C_CR;
1033   /* additional working stuff */
1034   PC              pc_R;
1035   Mat             F;
1036   PetscBool       isLU,isCHOL,isILU;
1037 
1038   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1039   PetscScalar     *work;
1040   PetscInt        *idx_V_B;
1041   PetscInt        n,n_vertices,n_constraints,*p0_lidx_I;
1042   PetscInt        i,n_R,n_D,n_B;
1043   PetscBool       unsymmetric_check;
1044   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
1045   MatType         impMatType;
1046   /* some shortcuts to scalars */
1047   PetscScalar     one=1.0,m_one=-1.0;
1048 
1049   PetscFunctionBegin;
1050   n_vertices = pcbddc->n_vertices;
1051   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1052   /* Set Non-overlapping dimensions */
1053   n_B = pcis->n_B;
1054   n_D = pcis->n - n_B;
1055   n_R = pcis->n - n_vertices;
1056 
1057   /* Set types for local objects needed by BDDC precondtioner */
1058   impMatType = MATSEQDENSE;
1059 
1060   /* vertices in boundary numbering */
1061   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1062   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1063   if (i != n_vertices) {
1064     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1065   }
1066 
1067   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1068   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1069   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1070   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1071   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1072   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1073   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1074   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1075   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1076   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1077 
1078   unsymmetric_check = PETSC_FALSE;
1079   /* allocate workspace */
1080   n = 0;
1081   if (n_constraints) {
1082     n += n_R*n_constraints;
1083   }
1084   if (n_vertices) {
1085     n = PetscMax(2*n_R*n_vertices,n);
1086     n = PetscMax((n_R+n_B)*n_vertices,n);
1087   }
1088   if (!pcbddc->symmetric_primal) {
1089     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
1090     unsymmetric_check = PETSC_TRUE;
1091   }
1092   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1093 
1094   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1095   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1096   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1097   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1098   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1099   if (isLU || isILU || isCHOL) {
1100     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1101   } else if (sub_schurs->reuse_mumps) {
1102     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1103     MatFactorType type;
1104 
1105     F = reuse_mumps->F;
1106     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1107     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1108   } else {
1109     F = NULL;
1110   }
1111 
1112   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1113   if (n_constraints) {
1114     Mat         M1,M2,M3;
1115     Mat         auxmat;
1116     IS          is_aux;
1117     PetscScalar *array,*array2;
1118 
1119     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1120     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1121 
1122     /* Extract constraints on R nodes: C_{CR}  */
1123     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1124     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1125     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1126 
1127     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1128     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1129     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1130     for (i=0;i<n_constraints;i++) {
1131       const PetscScalar *row_cmat_values;
1132       const PetscInt    *row_cmat_indices;
1133       PetscInt          size_of_constraint,j;
1134 
1135       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1136       for (j=0;j<size_of_constraint;j++) {
1137         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
1138       }
1139       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1140     }
1141     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1142     if (F) {
1143       Mat B;
1144 
1145       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1146       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1147       ierr = MatDestroy(&B);CHKERRQ(ierr);
1148     } else {
1149       PetscScalar *marr;
1150 
1151       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1152       for (i=0;i<n_constraints;i++) {
1153         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1154         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
1155         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1156         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1157         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1158       }
1159       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1160     }
1161     if (!pcbddc->switch_static) {
1162       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1163       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1164       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1165       for (i=0;i<n_constraints;i++) {
1166         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
1167         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1168         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1171         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1172       }
1173       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1174       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1175       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1176     } else {
1177       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1178       pcbddc->local_auxmat2 = local_auxmat2_R;
1179       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1180     }
1181     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1182     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1183     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1184     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1185     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1186     if (isCHOL) {
1187       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1188     } else {
1189       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1190     }
1191     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1192     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1193     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1194     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1195     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1196     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1197     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1198     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1199     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1200     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1201   }
1202   /* Get submatrices from subdomain matrix */
1203   if (pcbddc->benign_n) {
1204     IS        dummy;
1205     Mat       B0_R;
1206     PetscReal norm;
1207 
1208     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1209     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1210     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1211     if (norm > PETSC_SMALL) {
1212       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1213     }
1214     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1215     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1216   }
1217 
1218   if (n_vertices) {
1219     IS is_aux;
1220 
1221     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1222       IS tis;
1223 
1224       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1225       ierr = ISSort(tis);CHKERRQ(ierr);
1226       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1227       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1228     } else {
1229       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1230     }
1231     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1232     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1233     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1234     if (pcbddc->benign_n) {
1235       IS dummy;
1236 
1237       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1238       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1239       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1240     }
1241     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1242   }
1243 
1244   /* Matrix of coarse basis functions (local) */
1245   if (pcbddc->coarse_phi_B) {
1246     PetscInt on_B,on_primal,on_D=n_D;
1247     if (pcbddc->coarse_phi_D) {
1248       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1249     }
1250     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1251     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1252       PetscScalar *marray;
1253 
1254       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1255       ierr = PetscFree(marray);CHKERRQ(ierr);
1256       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1257       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1258       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1259       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1260     }
1261   }
1262 
1263   if (!pcbddc->coarse_phi_B) {
1264     PetscScalar *marray;
1265 
1266     n = n_B*pcbddc->local_primal_size;
1267     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1268       n += n_D*pcbddc->local_primal_size;
1269     }
1270     if (!pcbddc->symmetric_primal) {
1271       n *= 2;
1272     }
1273     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1275     n = n_B*pcbddc->local_primal_size;
1276     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1277       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1278       n += n_D*pcbddc->local_primal_size;
1279     }
1280     if (!pcbddc->symmetric_primal) {
1281       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1282       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1283         n = n_B*pcbddc->local_primal_size;
1284         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1285       }
1286     } else {
1287       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1288       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1289       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1290         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1291         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1292       }
1293     }
1294   }
1295 
1296   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1297   p0_lidx_I = NULL;
1298   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1299     const PetscInt *idxs;
1300 
1301     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1302     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1303     for (i=0;i<pcbddc->benign_n;i++) {
1304       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1305     }
1306     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1307   }
1308 
1309   /* vertices */
1310   if (n_vertices) {
1311 
1312     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1313 
1314     if (n_R) {
1315       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1316       PetscBLASInt B_N,B_one = 1;
1317       PetscScalar  *x,*y;
1318       PetscBool    isseqaij;
1319 
1320       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1321       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1322       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1323       if (F) { /* TODO could be optimized for symmetric problems */
1324         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1325       } else {
1326         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1327         for (i=0;i<n_vertices;i++) {
1328           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1329           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1330           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1331           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1332           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1333         }
1334         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1335       }
1336       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1337       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1338       if (n_constraints) {
1339         Mat B;
1340 
1341         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1342         for (i=0;i<n_vertices;i++) {
1343           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1344           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1345           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1347           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1348           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1349         }
1350         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1351         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1352         ierr = MatDestroy(&B);CHKERRQ(ierr);
1353         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1354         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1355         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1356         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1357         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1358         ierr = MatDestroy(&B);CHKERRQ(ierr);
1359       }
1360       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1361       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1362         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1363       }
1364       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1365       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1366       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1367       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1368       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1369       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1370       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1371       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1372       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1373       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1374     } else {
1375       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1376     }
1377     for (i=0;i<pcbddc->benign_n;i++) {
1378       const PetscScalar *vals;
1379       const PetscInt    *idxs;
1380       PetscInt          n,j,primal_idx;
1381 
1382       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1383       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1384       for (j=0;j<n;j++) {
1385         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1386         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1387       }
1388       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1389     }
1390     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1391 
1392     /* coarse basis functions */
1393     for (i=0;i<n_vertices;i++) {
1394       PetscScalar *y;
1395 
1396       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1397       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1398       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1399       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1400       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1401       y[n_B*i+idx_V_B[i]] = 1.0;
1402       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1403       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1404 
1405       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1406         PetscInt j;
1407 
1408         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1409         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1410         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1411         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1412         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1413         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1414         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1415       }
1416       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1417     }
1418     /* if n_R == 0 the object is not destroyed */
1419     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1420   }
1421 
1422   if (n_constraints) {
1423     Mat B;
1424 
1425     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1426     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1427     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1428     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1429     if (n_vertices) {
1430       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1431         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1432       } else {
1433         Mat S_VCt;
1434 
1435         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1436         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1437         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1438       }
1439     }
1440     ierr = MatDestroy(&B);CHKERRQ(ierr);
1441     /* coarse basis functions */
1442     for (i=0;i<n_constraints;i++) {
1443       PetscScalar *y;
1444 
1445       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1446       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1447       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1448       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1449       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1451       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1452       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1453         PetscInt j;
1454 
1455         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1456         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1457         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1460         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1461         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1462       }
1463       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1464     }
1465   }
1466   if (n_constraints) {
1467     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1470   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1471 
1472   /* compute other basis functions for non-symmetric problems */
1473   if (!pcbddc->symmetric_primal) {
1474 
1475     if (n_constraints) {
1476       Mat S_CCT,B_C;
1477 
1478       /* this is a lazy thing */
1479       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1480       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1481       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1482       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1483       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1484       if (n_vertices) {
1485         Mat B_V,S_VCT;
1486 
1487         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1488         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1489         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1490         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1491         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1492       }
1493       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1494     } else { /* if there are no constraints, reset work */
1495       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1496     }
1497     if (n_vertices && n_R) {
1498       Mat          A_VRT;
1499       PetscScalar  *marray;
1500       PetscBLASInt B_N,B_one = 1;
1501 
1502       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1503       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1504       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1505       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1506       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1507       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1508       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1509     }
1510 
1511     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1512       for (i=0;i<pcbddc->local_primal_size;i++) {
1513         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1514         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1515         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1516         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1517         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1518       }
1519     } else {
1520       for (i=0;i<pcbddc->local_primal_size;i++) {
1521         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1522         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1523         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1524         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1525         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1526       }
1527     }
1528     /* coarse basis functions */
1529     for (i=0;i<pcbddc->local_primal_size;i++) {
1530       PetscScalar *y;
1531 
1532       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1533       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1534       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1535       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537       if (i<n_vertices) {
1538         y[n_B*i+idx_V_B[i]] = 1.0;
1539       }
1540       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1541       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1542 
1543       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1544         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1545         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1546         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1549         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1550       }
1551       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1552     }
1553   }
1554   /* free memory */
1555   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1556   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1557   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1558   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1559   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1560   ierr = PetscFree(work);CHKERRQ(ierr);
1561   if (n_vertices) {
1562     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1563   }
1564   if (n_constraints) {
1565     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1566   }
1567   /* Checking coarse_sub_mat and coarse basis functios */
1568   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1569   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1570   if (pcbddc->dbg_flag) {
1571     Mat         coarse_sub_mat;
1572     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1573     Mat         coarse_phi_D,coarse_phi_B;
1574     Mat         coarse_psi_D,coarse_psi_B;
1575     Mat         A_II,A_BB,A_IB,A_BI;
1576     Mat         C_B,CPHI;
1577     IS          is_dummy;
1578     Vec         mones;
1579     MatType     checkmattype=MATSEQAIJ;
1580     PetscReal   real_value;
1581 
1582     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1583     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1584     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1585     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1586     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1587     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1588     if (unsymmetric_check) {
1589       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1590       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1591     }
1592     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1593 
1594     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1597     if (unsymmetric_check) {
1598       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1599       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1601       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1602       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1604       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1605       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1607       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1608       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1609       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1610     } else {
1611       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1612       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1613       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1614       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1615       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1616       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1617       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1618       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1619     }
1620     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1621     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1622     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1623     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1624     if (pcbddc->benign_n) {
1625       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1626       PetscScalar *data,*data2;
1627       PetscInt    j;
1628 
1629       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1630       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1631       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1632       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1633       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1634       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1635       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1636       for (j=0;j<pcbddc->benign_n;j++) {
1637         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1638         for (i=0;i<pcbddc->local_primal_size;i++) {
1639           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1640           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1641         }
1642       }
1643       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1644       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1645       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1646       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1647       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1648       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1649       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1650       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1651       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1652     }
1653 #if 0
1654   {
1655     PetscViewer viewer;
1656     char filename[256];
1657     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1658     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1659     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1660     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1661     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1662   }
1663 #endif
1664     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1665     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1666     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1667     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1668 
1669     /* check constraints */
1670     if (!pcbddc->benign_n) { /* TODO: add benign case */
1671       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1672       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1673       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1674       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1675       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1676       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1677       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1678       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1679       if (unsymmetric_check) {
1680         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1681         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1682         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1683         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1684         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1685       }
1686       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1687       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1688       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1689       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1690     }
1691     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1692     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1693     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1694     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1695     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1696     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1697     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1698     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1699     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1700     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1701     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1702     if (unsymmetric_check) {
1703       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1704       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1705     }
1706     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1707   }
1708   /* get back data */
1709   *coarse_submat_vals_n = coarse_submat_vals;
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1715 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1716 {
1717   Mat            *work_mat;
1718   IS             isrow_s,iscol_s;
1719   PetscBool      rsorted,csorted;
1720   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1725   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1726   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1727   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1728 
1729   if (!rsorted) {
1730     const PetscInt *idxs;
1731     PetscInt *idxs_sorted,i;
1732 
1733     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1734     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1735     for (i=0;i<rsize;i++) {
1736       idxs_perm_r[i] = i;
1737     }
1738     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1739     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1740     for (i=0;i<rsize;i++) {
1741       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1742     }
1743     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1744     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1745   } else {
1746     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1747     isrow_s = isrow;
1748   }
1749 
1750   if (!csorted) {
1751     if (isrow == iscol) {
1752       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1753       iscol_s = isrow_s;
1754     } else {
1755       const PetscInt *idxs;
1756       PetscInt *idxs_sorted,i;
1757 
1758       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1759       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1760       for (i=0;i<csize;i++) {
1761         idxs_perm_c[i] = i;
1762       }
1763       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1764       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1765       for (i=0;i<csize;i++) {
1766         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1767       }
1768       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1769       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1770     }
1771   } else {
1772     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1773     iscol_s = iscol;
1774   }
1775 
1776   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1777 
1778   if (!rsorted || !csorted) {
1779     Mat      new_mat;
1780     IS       is_perm_r,is_perm_c;
1781 
1782     if (!rsorted) {
1783       PetscInt *idxs_r,i;
1784       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1785       for (i=0;i<rsize;i++) {
1786         idxs_r[idxs_perm_r[i]] = i;
1787       }
1788       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1789       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1790     } else {
1791       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1792     }
1793     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1794 
1795     if (!csorted) {
1796       if (isrow_s == iscol_s) {
1797         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1798         is_perm_c = is_perm_r;
1799       } else {
1800         PetscInt *idxs_c,i;
1801         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1802         for (i=0;i<csize;i++) {
1803           idxs_c[idxs_perm_c[i]] = i;
1804         }
1805         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1806         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1807       }
1808     } else {
1809       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1810     }
1811     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1812 
1813     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1814     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1815     work_mat[0] = new_mat;
1816     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1817     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1818   }
1819 
1820   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1821   *B = work_mat[0];
1822   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1823   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1824   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1830 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1831 {
1832   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1833   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1834   Mat            new_mat;
1835   IS             is_local,is_global;
1836   PetscInt       local_size;
1837   PetscBool      isseqaij;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1842   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1843   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1844   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1845   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1846   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1847   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1848 
1849   /* check */
1850   if (pcbddc->dbg_flag) {
1851     Vec       x,x_change;
1852     PetscReal error;
1853 
1854     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1855     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1856     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1857     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1858     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1860     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1861     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1862     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1863     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1864     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1865     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1866     ierr = VecDestroy(&x);CHKERRQ(ierr);
1867     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1868   }
1869 
1870   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1871   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1872   if (isseqaij) {
1873     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1874   } else {
1875     Mat work_mat;
1876     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1877     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1878     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1879   }
1880   if (matis->A->symmetric_set) {
1881     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1882 #if !defined(PETSC_USE_COMPLEX)
1883     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1884 #endif
1885   }
1886   /*
1887   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1888   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1889   */
1890   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1896 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1897 {
1898   PC_IS*          pcis = (PC_IS*)(pc->data);
1899   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1900   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1901   PetscInt        *idx_R_local=NULL;
1902   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1903   PetscInt        vbs,bs;
1904   PetscBT         bitmask=NULL;
1905   PetscErrorCode  ierr;
1906 
1907   PetscFunctionBegin;
1908   /*
1909     No need to setup local scatters if
1910       - primal space is unchanged
1911         AND
1912       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1913         AND
1914       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1915   */
1916   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1917     PetscFunctionReturn(0);
1918   }
1919   /* destroy old objects */
1920   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1921   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1922   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1923   /* Set Non-overlapping dimensions */
1924   n_B = pcis->n_B;
1925   n_D = pcis->n - n_B;
1926   n_vertices = pcbddc->n_vertices;
1927 
1928   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1929 
1930   /* create auxiliary bitmask and allocate workspace */
1931   if (!sub_schurs->reuse_mumps) {
1932     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1933     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1934     for (i=0;i<n_vertices;i++) {
1935       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1936     }
1937 
1938     for (i=0, n_R=0; i<pcis->n; i++) {
1939       if (!PetscBTLookup(bitmask,i)) {
1940         idx_R_local[n_R++] = i;
1941       }
1942     }
1943   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1944     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1945 
1946     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1947     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1948   }
1949 
1950   /* Block code */
1951   vbs = 1;
1952   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1953   if (bs>1 && !(n_vertices%bs)) {
1954     PetscBool is_blocked = PETSC_TRUE;
1955     PetscInt  *vary;
1956     if (!sub_schurs->reuse_mumps) {
1957       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1958       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1959       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1960       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1961       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1962       for (i=0; i<pcis->n/bs; i++) {
1963         if (vary[i]!=0 && vary[i]!=bs) {
1964           is_blocked = PETSC_FALSE;
1965           break;
1966         }
1967       }
1968       ierr = PetscFree(vary);CHKERRQ(ierr);
1969     } else {
1970       /* Verify directly the R set */
1971       for (i=0; i<n_R/bs; i++) {
1972         PetscInt j,node=idx_R_local[bs*i];
1973         for (j=1; j<bs; j++) {
1974           if (node != idx_R_local[bs*i+j]-j) {
1975             is_blocked = PETSC_FALSE;
1976             break;
1977           }
1978         }
1979       }
1980     }
1981     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1982       vbs = bs;
1983       for (i=0;i<n_R/vbs;i++) {
1984         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1985       }
1986     }
1987   }
1988   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1989   if (sub_schurs->reuse_mumps) {
1990     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1991 
1992     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1993     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1994     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1995     reuse_mumps->is_R = pcbddc->is_R_local;
1996   } else {
1997     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1998   }
1999 
2000   /* print some info if requested */
2001   if (pcbddc->dbg_flag) {
2002     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2003     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2004     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2005     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2006     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2007     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2008     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2009   }
2010 
2011   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2012   if (!sub_schurs->reuse_mumps) {
2013     IS       is_aux1,is_aux2;
2014     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2015 
2016     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2017     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2018     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2019     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2020     for (i=0; i<n_D; i++) {
2021       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2022     }
2023     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2024     for (i=0, j=0; i<n_R; i++) {
2025       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2026         aux_array1[j++] = i;
2027       }
2028     }
2029     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2030     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2031     for (i=0, j=0; i<n_B; i++) {
2032       if (!PetscBTLookup(bitmask,is_indices[i])) {
2033         aux_array2[j++] = i;
2034       }
2035     }
2036     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2037     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2038     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2039     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2040     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2041 
2042     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2043       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2044       for (i=0, j=0; i<n_R; i++) {
2045         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2046           aux_array1[j++] = i;
2047         }
2048       }
2049       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2050       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2051       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2052     }
2053     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2054     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2055   } else {
2056     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2057     IS               tis;
2058     PetscInt         schur_size;
2059 
2060     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2061     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2062     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2063     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2064     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2065       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2066       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2067       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2068     }
2069   }
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2076 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2077 {
2078   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2079   PC_IS          *pcis = (PC_IS*)pc->data;
2080   PC             pc_temp;
2081   Mat            A_RR;
2082   MatReuse       reuse;
2083   PetscScalar    m_one = -1.0;
2084   PetscReal      value;
2085   PetscInt       n_D,n_R;
2086   PetscBool      use_exact,use_exact_reduced,issbaij;
2087   PetscErrorCode ierr;
2088   /* prefixes stuff */
2089   char           dir_prefix[256],neu_prefix[256],str_level[16];
2090   size_t         len;
2091 
2092   PetscFunctionBegin;
2093 
2094   /* compute prefixes */
2095   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2096   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2097   if (!pcbddc->current_level) {
2098     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2099     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2100     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2101     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2102   } else {
2103     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2104     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2105     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2106     len -= 15; /* remove "pc_bddc_coarse_" */
2107     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2108     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2109     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2110     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2111     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2112     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2113     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2114     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2115   }
2116 
2117   /* DIRICHLET PROBLEM */
2118   if (dirichlet) {
2119     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2120     if (pcbddc->local_mat->symmetric_set) {
2121       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2122     }
2123     /* Matrix for Dirichlet problem is pcis->A_II */
2124     n_D = pcis->n - pcis->n_B;
2125     if (!pcbddc->ksp_D) { /* create object if not yet build */
2126       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2127       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2128       /* default */
2129       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2130       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2131       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2132       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2133       if (issbaij) {
2134         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2135       } else {
2136         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2137       }
2138       /* Allow user's customization */
2139       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2140       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2141     }
2142     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2143     if (sub_schurs->reuse_mumps) {
2144       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2145 
2146       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2147     }
2148     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2149     if (!n_D) {
2150       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2151       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2152     }
2153     /* Set Up KSP for Dirichlet problem of BDDC */
2154     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2155     /* set ksp_D into pcis data */
2156     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2157     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2158     pcis->ksp_D = pcbddc->ksp_D;
2159   }
2160 
2161   /* NEUMANN PROBLEM */
2162   A_RR = 0;
2163   if (neumann) {
2164     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2165     PetscInt        ibs,mbs;
2166     PetscBool       issbaij;
2167     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2168     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2169     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2170     if (pcbddc->ksp_R) { /* already created ksp */
2171       PetscInt nn_R;
2172       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2173       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2174       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2175       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2176         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2177         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2178         reuse = MAT_INITIAL_MATRIX;
2179       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2180         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2181           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2182           reuse = MAT_INITIAL_MATRIX;
2183         } else { /* safe to reuse the matrix */
2184           reuse = MAT_REUSE_MATRIX;
2185         }
2186       }
2187       /* last check */
2188       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2189         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2190         reuse = MAT_INITIAL_MATRIX;
2191       }
2192     } else { /* first time, so we need to create the matrix */
2193       reuse = MAT_INITIAL_MATRIX;
2194     }
2195     /* extract A_RR */
2196     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2197     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2198     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2199     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2200       if (matis->A == pcbddc->local_mat) {
2201         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2202         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2203       } else {
2204         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2205       }
2206     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2207       if (matis->A == pcbddc->local_mat) {
2208         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2209         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2210       } else {
2211         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2212       }
2213     }
2214     if (!sub_schurs->reuse_mumps) {
2215       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2216       if (pcbddc->local_mat->symmetric_set) {
2217         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2218       }
2219     } else {
2220       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2221 
2222       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2223       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
2224       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2225     }
2226     if (!pcbddc->ksp_R) { /* create object if not present */
2227       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2228       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2229       /* default */
2230       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2231       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2232       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2233       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2234       if (issbaij) {
2235         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2236       } else {
2237         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2238       }
2239       /* Allow user's customization */
2240       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2241       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2242     }
2243     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2244     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2245     if (!n_R) {
2246       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2247       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2248     }
2249     /* Reuse MUMPS solver if it is present */
2250     if (sub_schurs->reuse_mumps) {
2251       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2252 
2253       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2254     }
2255     /* Set Up KSP for Neumann problem of BDDC */
2256     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2257   }
2258   /* free Neumann problem's matrix */
2259   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2260 
2261   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2262   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2263     if (pcbddc->dbg_flag) {
2264       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2265       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2266       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2267     }
2268     if (dirichlet) { /* Dirichlet */
2269       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2270       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2271       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2272       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2273       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2274       /* need to be adapted? */
2275       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2276       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2277       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2278       /* print info */
2279       if (pcbddc->dbg_flag) {
2280         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2281         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2282       }
2283       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2284         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2285       }
2286     }
2287     if (neumann) { /* Neumann */
2288       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2289       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2290       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2291       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2292       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2293       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2294       /* need to be adapted? */
2295       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2296       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2297       /* print info */
2298       if (pcbddc->dbg_flag) {
2299         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2300         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2301       }
2302       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2303         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2304       }
2305     }
2306   }
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2312 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2313 {
2314   PetscErrorCode  ierr;
2315   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2316   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2317 
2318   PetscFunctionBegin;
2319   if (!sub_schurs->reuse_mumps) {
2320     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2321   }
2322   if (!pcbddc->switch_static) {
2323     if (applytranspose && pcbddc->local_auxmat1) {
2324       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2325       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2326     }
2327     if (!sub_schurs->reuse_mumps) {
2328       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2329       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2330     } else {
2331       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2332 
2333       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2334       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2335     }
2336   } else {
2337     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2338     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2339     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2340     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2341     if (applytranspose && pcbddc->local_auxmat1) {
2342       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2343       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2344       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2345       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2346     }
2347   }
2348   if (!sub_schurs->reuse_mumps) {
2349     if (applytranspose) {
2350       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2351     } else {
2352       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2353     }
2354 #if defined(PETSC_HAVE_MUMPS)
2355   } else {
2356     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2357 
2358     if (applytranspose) {
2359       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2360     } else {
2361       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2362     }
2363 #endif
2364   }
2365   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2366   if (!pcbddc->switch_static) {
2367     if (!sub_schurs->reuse_mumps) {
2368       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2369       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2370     } else {
2371       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2372 
2373       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2374       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2375     }
2376     if (!applytranspose && pcbddc->local_auxmat1) {
2377       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2378       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2379     }
2380   } else {
2381     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2382     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2383     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2384     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2385     if (!applytranspose && pcbddc->local_auxmat1) {
2386       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2387       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2388     }
2389     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2390     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2391     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393   }
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2398 #undef __FUNCT__
2399 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2400 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2401 {
2402   PetscErrorCode ierr;
2403   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2404   PC_IS*            pcis = (PC_IS*)  (pc->data);
2405   const PetscScalar zero = 0.0;
2406 
2407   PetscFunctionBegin;
2408   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2409   if (applytranspose) {
2410     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2411     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2412   } else {
2413     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2414     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2415   }
2416 
2417   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2418   if (pcbddc->benign_n) {
2419     PetscScalar *array;
2420     PetscInt    j;
2421 
2422     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2423     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2424     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2425   }
2426 
2427   /* start communications from local primal nodes to rhs of coarse solver */
2428   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2429   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431 
2432   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2433   /* TODO remove null space when doing multilevel */
2434   if (pcbddc->coarse_ksp) {
2435     Mat coarse_mat;
2436     Vec rhs,sol;
2437     MatNullSpace nullsp;
2438 
2439     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2440     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2441     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2442     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2443     if (nullsp) {
2444       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2445     }
2446     if (applytranspose) {
2447       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2448     } else {
2449       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2450     }
2451     if (nullsp) {
2452       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2453     }
2454   }
2455 
2456   /* Local solution on R nodes */
2457   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2458     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2459   }
2460 
2461   /* communications from coarse sol to local primal nodes */
2462   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2463   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2464 
2465   /* Sum contributions from two levels */
2466   if (applytranspose) {
2467     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2468     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2469   } else {
2470     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2471     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2472   }
2473   /* store p0 */
2474   if (pcbddc->benign_n) {
2475     PetscScalar *array;
2476     PetscInt    j;
2477 
2478     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2479     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2480     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2487 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2488 {
2489   PetscErrorCode ierr;
2490   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2491   PetscScalar    *array;
2492   Vec            from,to;
2493 
2494   PetscFunctionBegin;
2495   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2496     from = pcbddc->coarse_vec;
2497     to = pcbddc->vec1_P;
2498     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2499       Vec tvec;
2500 
2501       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2502       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2503       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2504       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2505       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2506       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2507     }
2508   } else { /* from local to global -> put data in coarse right hand side */
2509     from = pcbddc->vec1_P;
2510     to = pcbddc->coarse_vec;
2511   }
2512   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2518 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2519 {
2520   PetscErrorCode ierr;
2521   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2522   PetscScalar    *array;
2523   Vec            from,to;
2524 
2525   PetscFunctionBegin;
2526   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2527     from = pcbddc->coarse_vec;
2528     to = pcbddc->vec1_P;
2529   } else { /* from local to global -> put data in coarse right hand side */
2530     from = pcbddc->vec1_P;
2531     to = pcbddc->coarse_vec;
2532   }
2533   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2534   if (smode == SCATTER_FORWARD) {
2535     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2536       Vec tvec;
2537 
2538       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2539       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2540       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2541       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2542     }
2543   } else {
2544     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2545      ierr = VecResetArray(from);CHKERRQ(ierr);
2546     }
2547   }
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 /* uncomment for testing purposes */
2552 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2553 #undef __FUNCT__
2554 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2555 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2556 {
2557   PetscErrorCode    ierr;
2558   PC_IS*            pcis = (PC_IS*)(pc->data);
2559   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2560   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2561   /* one and zero */
2562   PetscScalar       one=1.0,zero=0.0;
2563   /* space to store constraints and their local indices */
2564   PetscScalar       *constraints_data;
2565   PetscInt          *constraints_idxs,*constraints_idxs_B;
2566   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2567   PetscInt          *constraints_n;
2568   /* iterators */
2569   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2570   /* BLAS integers */
2571   PetscBLASInt      lwork,lierr;
2572   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2573   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2574   /* reuse */
2575   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2576   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2577   /* change of basis */
2578   PetscBool         qr_needed;
2579   PetscBT           change_basis,qr_needed_idx;
2580   /* auxiliary stuff */
2581   PetscInt          *nnz,*is_indices;
2582   PetscInt          ncc;
2583   /* some quantities */
2584   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2585   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2586 
2587   PetscFunctionBegin;
2588   /* Destroy Mat objects computed previously */
2589   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2590   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2591   /* save info on constraints from previous setup (if any) */
2592   olocal_primal_size = pcbddc->local_primal_size;
2593   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2594   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2595   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2596   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2597   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2598   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2599 
2600   /* print some info */
2601   if (pcbddc->dbg_flag) {
2602     IS       vertices;
2603     PetscInt nv,nedges,nfaces;
2604     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2605     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2606     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2607     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2612     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2613   }
2614 
2615   if (!pcbddc->adaptive_selection) {
2616     IS           ISForVertices,*ISForFaces,*ISForEdges;
2617     MatNullSpace nearnullsp;
2618     const Vec    *nearnullvecs;
2619     Vec          *localnearnullsp;
2620     PetscScalar  *array;
2621     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2622     PetscBool    nnsp_has_cnst;
2623     /* LAPACK working arrays for SVD or POD */
2624     PetscBool    skip_lapack,boolforchange;
2625     PetscScalar  *work;
2626     PetscReal    *singular_vals;
2627 #if defined(PETSC_USE_COMPLEX)
2628     PetscReal    *rwork;
2629 #endif
2630 #if defined(PETSC_MISSING_LAPACK_GESVD)
2631     PetscScalar  *temp_basis,*correlation_mat;
2632 #else
2633     PetscBLASInt dummy_int=1;
2634     PetscScalar  dummy_scalar=1.;
2635 #endif
2636 
2637     /* Get index sets for faces, edges and vertices from graph */
2638     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2639     /* free unneeded index sets */
2640     if (!pcbddc->use_vertices) {
2641       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2642     }
2643     if (!pcbddc->use_edges) {
2644       for (i=0;i<n_ISForEdges;i++) {
2645         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2646       }
2647       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2648       n_ISForEdges = 0;
2649     }
2650     if (!pcbddc->use_faces) {
2651       for (i=0;i<n_ISForFaces;i++) {
2652         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2653       }
2654       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2655       n_ISForFaces = 0;
2656     }
2657 
2658 #if defined(PETSC_USE_DEBUG)
2659     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2660        Also use_change_of_basis should be consistent among processors */
2661     if (pcbddc->NullSpace) {
2662       PetscBool tbool[2],gbool[2];
2663 
2664       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2665         pcbddc->use_change_of_basis = PETSC_TRUE;
2666         if (!ISForEdges) {
2667           pcbddc->use_change_on_faces = PETSC_TRUE;
2668         }
2669       }
2670       tbool[0] = pcbddc->use_change_of_basis;
2671       tbool[1] = pcbddc->use_change_on_faces;
2672       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2673       pcbddc->use_change_of_basis = gbool[0];
2674       pcbddc->use_change_on_faces = gbool[1];
2675     }
2676 #endif
2677 
2678     /* check if near null space is attached to global mat */
2679     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2680     if (nearnullsp) {
2681       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2682       /* remove any stored info */
2683       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2684       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2685       /* store information for BDDC solver reuse */
2686       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2687       pcbddc->onearnullspace = nearnullsp;
2688       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2689       for (i=0;i<nnsp_size;i++) {
2690         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2691       }
2692     } else { /* if near null space is not provided BDDC uses constants by default */
2693       nnsp_size = 0;
2694       nnsp_has_cnst = PETSC_TRUE;
2695     }
2696     /* get max number of constraints on a single cc */
2697     max_constraints = nnsp_size;
2698     if (nnsp_has_cnst) max_constraints++;
2699 
2700     /*
2701          Evaluate maximum storage size needed by the procedure
2702          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2703          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2704          There can be multiple constraints per connected component
2705                                                                                                                                                            */
2706     n_vertices = 0;
2707     if (ISForVertices) {
2708       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2709     }
2710     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2711     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2712 
2713     total_counts = n_ISForFaces+n_ISForEdges;
2714     total_counts *= max_constraints;
2715     total_counts += n_vertices;
2716     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2717 
2718     total_counts = 0;
2719     max_size_of_constraint = 0;
2720     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2721       IS used_is;
2722       if (i<n_ISForEdges) {
2723         used_is = ISForEdges[i];
2724       } else {
2725         used_is = ISForFaces[i-n_ISForEdges];
2726       }
2727       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2728       total_counts += j;
2729       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2730     }
2731     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2732 
2733     /* get local part of global near null space vectors */
2734     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2735     for (k=0;k<nnsp_size;k++) {
2736       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2737       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2738       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739     }
2740 
2741     /* whether or not to skip lapack calls */
2742     skip_lapack = PETSC_TRUE;
2743     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2744 
2745     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2746     if (!skip_lapack) {
2747       PetscScalar temp_work;
2748 
2749 #if defined(PETSC_MISSING_LAPACK_GESVD)
2750       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2751       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2752       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2753       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2754 #if defined(PETSC_USE_COMPLEX)
2755       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2756 #endif
2757       /* now we evaluate the optimal workspace using query with lwork=-1 */
2758       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2759       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2760       lwork = -1;
2761       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2762 #if !defined(PETSC_USE_COMPLEX)
2763       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2764 #else
2765       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2766 #endif
2767       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2768       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2769 #else /* on missing GESVD */
2770       /* SVD */
2771       PetscInt max_n,min_n;
2772       max_n = max_size_of_constraint;
2773       min_n = max_constraints;
2774       if (max_size_of_constraint < max_constraints) {
2775         min_n = max_size_of_constraint;
2776         max_n = max_constraints;
2777       }
2778       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2779 #if defined(PETSC_USE_COMPLEX)
2780       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2781 #endif
2782       /* now we evaluate the optimal workspace using query with lwork=-1 */
2783       lwork = -1;
2784       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2785       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2786       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2787       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2788 #if !defined(PETSC_USE_COMPLEX)
2789       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2790 #else
2791       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2792 #endif
2793       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2794       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2795 #endif /* on missing GESVD */
2796       /* Allocate optimal workspace */
2797       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2798       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2799     }
2800     /* Now we can loop on constraining sets */
2801     total_counts = 0;
2802     constraints_idxs_ptr[0] = 0;
2803     constraints_data_ptr[0] = 0;
2804     /* vertices */
2805     if (n_vertices) {
2806       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2807       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2808       for (i=0;i<n_vertices;i++) {
2809         constraints_n[total_counts] = 1;
2810         constraints_data[total_counts] = 1.0;
2811         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2812         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2813         total_counts++;
2814       }
2815       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2816       n_vertices = total_counts;
2817     }
2818 
2819     /* edges and faces */
2820     total_counts_cc = total_counts;
2821     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2822       IS        used_is;
2823       PetscBool idxs_copied = PETSC_FALSE;
2824 
2825       if (ncc<n_ISForEdges) {
2826         used_is = ISForEdges[ncc];
2827         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2828       } else {
2829         used_is = ISForFaces[ncc-n_ISForEdges];
2830         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2831       }
2832       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2833 
2834       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2835       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2836       /* change of basis should not be performed on local periodic nodes */
2837       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2838       if (nnsp_has_cnst) {
2839         PetscScalar quad_value;
2840 
2841         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2842         idxs_copied = PETSC_TRUE;
2843 
2844         if (!pcbddc->use_nnsp_true) {
2845           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2846         } else {
2847           quad_value = 1.0;
2848         }
2849         for (j=0;j<size_of_constraint;j++) {
2850           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2851         }
2852         temp_constraints++;
2853         total_counts++;
2854       }
2855       for (k=0;k<nnsp_size;k++) {
2856         PetscReal real_value;
2857         PetscScalar *ptr_to_data;
2858 
2859         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2860         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2861         for (j=0;j<size_of_constraint;j++) {
2862           ptr_to_data[j] = array[is_indices[j]];
2863         }
2864         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2865         /* check if array is null on the connected component */
2866         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2867         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2868         if (real_value > 0.0) { /* keep indices and values */
2869           temp_constraints++;
2870           total_counts++;
2871           if (!idxs_copied) {
2872             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2873             idxs_copied = PETSC_TRUE;
2874           }
2875         }
2876       }
2877       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2878       valid_constraints = temp_constraints;
2879       if (!pcbddc->use_nnsp_true && temp_constraints) {
2880         if (temp_constraints == 1) { /* just normalize the constraint */
2881           PetscScalar norm,*ptr_to_data;
2882 
2883           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2884           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2885           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2886           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2887           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2888         } else { /* perform SVD */
2889           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2890           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2891 
2892 #if defined(PETSC_MISSING_LAPACK_GESVD)
2893           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2894              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2895              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2896                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2897                 from that computed using LAPACKgesvd
2898              -> This is due to a different computation of eigenvectors in LAPACKheev
2899              -> The quality of the POD-computed basis will be the same */
2900           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2901           /* Store upper triangular part of correlation matrix */
2902           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2903           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2904           for (j=0;j<temp_constraints;j++) {
2905             for (k=0;k<j+1;k++) {
2906               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2907             }
2908           }
2909           /* compute eigenvalues and eigenvectors of correlation matrix */
2910           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2911           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2912 #if !defined(PETSC_USE_COMPLEX)
2913           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2914 #else
2915           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2916 #endif
2917           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2918           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2919           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2920           j = 0;
2921           while (j < temp_constraints && singular_vals[j] < tol) j++;
2922           total_counts = total_counts-j;
2923           valid_constraints = temp_constraints-j;
2924           /* scale and copy POD basis into used quadrature memory */
2925           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2926           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2927           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2928           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2929           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2930           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2931           if (j<temp_constraints) {
2932             PetscInt ii;
2933             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2934             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2935             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2936             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2937             for (k=0;k<temp_constraints-j;k++) {
2938               for (ii=0;ii<size_of_constraint;ii++) {
2939                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2940               }
2941             }
2942           }
2943 #else  /* on missing GESVD */
2944           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2945           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2946           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2947           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2948 #if !defined(PETSC_USE_COMPLEX)
2949           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2950 #else
2951           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2952 #endif
2953           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2954           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2955           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2956           k = temp_constraints;
2957           if (k > size_of_constraint) k = size_of_constraint;
2958           j = 0;
2959           while (j < k && singular_vals[k-j-1] < tol) j++;
2960           valid_constraints = k-j;
2961           total_counts = total_counts-temp_constraints+valid_constraints;
2962 #endif /* on missing GESVD */
2963         }
2964       }
2965       /* update pointers information */
2966       if (valid_constraints) {
2967         constraints_n[total_counts_cc] = valid_constraints;
2968         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2969         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2970         /* set change_of_basis flag */
2971         if (boolforchange) {
2972           PetscBTSet(change_basis,total_counts_cc);
2973         }
2974         total_counts_cc++;
2975       }
2976     }
2977     /* free workspace */
2978     if (!skip_lapack) {
2979       ierr = PetscFree(work);CHKERRQ(ierr);
2980 #if defined(PETSC_USE_COMPLEX)
2981       ierr = PetscFree(rwork);CHKERRQ(ierr);
2982 #endif
2983       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2984 #if defined(PETSC_MISSING_LAPACK_GESVD)
2985       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2986       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2987 #endif
2988     }
2989     for (k=0;k<nnsp_size;k++) {
2990       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2991     }
2992     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2993     /* free index sets of faces, edges and vertices */
2994     for (i=0;i<n_ISForFaces;i++) {
2995       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2996     }
2997     if (n_ISForFaces) {
2998       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2999     }
3000     for (i=0;i<n_ISForEdges;i++) {
3001       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3002     }
3003     if (n_ISForEdges) {
3004       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3005     }
3006     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3007   } else {
3008     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3009 
3010     total_counts = 0;
3011     n_vertices = 0;
3012     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3013       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3014     }
3015     max_constraints = 0;
3016     total_counts_cc = 0;
3017     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3018       total_counts += pcbddc->adaptive_constraints_n[i];
3019       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3020       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3021     }
3022     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3023     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3024     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3025     constraints_data = pcbddc->adaptive_constraints_data;
3026     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3027     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3028     total_counts_cc = 0;
3029     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3030       if (pcbddc->adaptive_constraints_n[i]) {
3031         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3032       }
3033     }
3034 #if 0
3035     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3036     for (i=0;i<total_counts_cc;i++) {
3037       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3038       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3039       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3040         printf(" %d",constraints_idxs[j]);
3041       }
3042       printf("\n");
3043       printf("number of cc: %d\n",constraints_n[i]);
3044     }
3045     for (i=0;i<n_vertices;i++) {
3046       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3047     }
3048     for (i=0;i<sub_schurs->n_subs;i++) {
3049       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3050     }
3051 #endif
3052 
3053     max_size_of_constraint = 0;
3054     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3055     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3056     /* Change of basis */
3057     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3058     if (pcbddc->use_change_of_basis) {
3059       for (i=0;i<sub_schurs->n_subs;i++) {
3060         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3061           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3062         }
3063       }
3064     }
3065   }
3066   pcbddc->local_primal_size = total_counts;
3067   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3068 
3069   /* map constraints_idxs in boundary numbering */
3070   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3071   if (i != constraints_idxs_ptr[total_counts_cc]) {
3072     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3073   }
3074 
3075   /* Create constraint matrix */
3076   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3077   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3078   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3079 
3080   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3081   /* determine if a QR strategy is needed for change of basis */
3082   qr_needed = PETSC_FALSE;
3083   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3084   total_primal_vertices=0;
3085   pcbddc->local_primal_size_cc = 0;
3086   for (i=0;i<total_counts_cc;i++) {
3087     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3088     if (size_of_constraint == 1) {
3089       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3090       pcbddc->local_primal_size_cc += 1;
3091     } else if (PetscBTLookup(change_basis,i)) {
3092       for (k=0;k<constraints_n[i];k++) {
3093         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3094       }
3095       pcbddc->local_primal_size_cc += constraints_n[i];
3096       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3097         PetscBTSet(qr_needed_idx,i);
3098         qr_needed = PETSC_TRUE;
3099       }
3100     } else {
3101       pcbddc->local_primal_size_cc += 1;
3102     }
3103   }
3104   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3105   pcbddc->n_vertices = total_primal_vertices;
3106   /* permute indices in order to have a sorted set of vertices */
3107   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3108 
3109   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3110   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3111   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3112 
3113   /* nonzero structure of constraint matrix */
3114   /* and get reference dof for local constraints */
3115   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3116   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3117 
3118   j = total_primal_vertices;
3119   total_counts = total_primal_vertices;
3120   cum = total_primal_vertices;
3121   for (i=n_vertices;i<total_counts_cc;i++) {
3122     if (!PetscBTLookup(change_basis,i)) {
3123       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3124       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3125       cum++;
3126       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3127       for (k=0;k<constraints_n[i];k++) {
3128         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3129         nnz[j+k] = size_of_constraint;
3130       }
3131       j += constraints_n[i];
3132     }
3133   }
3134   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3135   ierr = PetscFree(nnz);CHKERRQ(ierr);
3136 
3137   /* set values in constraint matrix */
3138   for (i=0;i<total_primal_vertices;i++) {
3139     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3140   }
3141   total_counts = total_primal_vertices;
3142   for (i=n_vertices;i<total_counts_cc;i++) {
3143     if (!PetscBTLookup(change_basis,i)) {
3144       PetscInt *cols;
3145 
3146       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3147       cols = constraints_idxs+constraints_idxs_ptr[i];
3148       for (k=0;k<constraints_n[i];k++) {
3149         PetscInt    row = total_counts+k;
3150         PetscScalar *vals;
3151 
3152         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3153         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3154       }
3155       total_counts += constraints_n[i];
3156     }
3157   }
3158   /* assembling */
3159   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161 
3162   /*
3163   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3164   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3165   */
3166   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3167   if (pcbddc->use_change_of_basis) {
3168     /* dual and primal dofs on a single cc */
3169     PetscInt     dual_dofs,primal_dofs;
3170     /* working stuff for GEQRF */
3171     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3172     PetscBLASInt lqr_work;
3173     /* working stuff for UNGQR */
3174     PetscScalar  *gqr_work,lgqr_work_t;
3175     PetscBLASInt lgqr_work;
3176     /* working stuff for TRTRS */
3177     PetscScalar  *trs_rhs;
3178     PetscBLASInt Blas_NRHS;
3179     /* pointers for values insertion into change of basis matrix */
3180     PetscInt     *start_rows,*start_cols;
3181     PetscScalar  *start_vals;
3182     /* working stuff for values insertion */
3183     PetscBT      is_primal;
3184     PetscInt     *aux_primal_numbering_B;
3185     /* matrix sizes */
3186     PetscInt     global_size,local_size;
3187     /* temporary change of basis */
3188     Mat          localChangeOfBasisMatrix;
3189     /* extra space for debugging */
3190     PetscScalar  *dbg_work;
3191 
3192     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3193     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3194     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3195     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3196     /* nonzeros for local mat */
3197     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3198     for (i=0;i<pcis->n;i++) nnz[i]=1;
3199     for (i=n_vertices;i<total_counts_cc;i++) {
3200       if (PetscBTLookup(change_basis,i)) {
3201         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3202         if (PetscBTLookup(qr_needed_idx,i)) {
3203           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3204         } else {
3205           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3206           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3207         }
3208       }
3209     }
3210     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3211     ierr = PetscFree(nnz);CHKERRQ(ierr);
3212     /* Set initial identity in the matrix */
3213     for (i=0;i<pcis->n;i++) {
3214       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3215     }
3216 
3217     if (pcbddc->dbg_flag) {
3218       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3219       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3220     }
3221 
3222 
3223     /* Now we loop on the constraints which need a change of basis */
3224     /*
3225        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3226        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3227 
3228        Basic blocks of change of basis matrix T computed by
3229 
3230           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3231 
3232             | 1        0   ...        0         s_1/S |
3233             | 0        1   ...        0         s_2/S |
3234             |              ...                        |
3235             | 0        ...            1     s_{n-1}/S |
3236             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3237 
3238             with S = \sum_{i=1}^n s_i^2
3239             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3240                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3241 
3242           - QR decomposition of constraints otherwise
3243     */
3244     if (qr_needed) {
3245       /* space to store Q */
3246       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3247       /* first we issue queries for optimal work */
3248       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3249       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3250       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3251       lqr_work = -1;
3252       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3253       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3254       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3255       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3256       lgqr_work = -1;
3257       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3258       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3259       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3260       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3261       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3262       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3263       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3264       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3265       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3266       /* array to store scaling factors for reflectors */
3267       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3268       /* array to store rhs and solution of triangular solver */
3269       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3270       /* allocating workspace for check */
3271       if (pcbddc->dbg_flag) {
3272         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3273       }
3274     }
3275     /* array to store whether a node is primal or not */
3276     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3277     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3278     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3279     if (i != total_primal_vertices) {
3280       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3281     }
3282     for (i=0;i<total_primal_vertices;i++) {
3283       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3284     }
3285     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3286 
3287     /* loop on constraints and see whether or not they need a change of basis and compute it */
3288     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3289       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3290       if (PetscBTLookup(change_basis,total_counts)) {
3291         /* get constraint info */
3292         primal_dofs = constraints_n[total_counts];
3293         dual_dofs = size_of_constraint-primal_dofs;
3294 
3295         if (pcbddc->dbg_flag) {
3296           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3297         }
3298 
3299         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3300 
3301           /* copy quadrature constraints for change of basis check */
3302           if (pcbddc->dbg_flag) {
3303             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3304           }
3305           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3306           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3307 
3308           /* compute QR decomposition of constraints */
3309           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3310           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3311           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3312           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3313           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3314           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3315           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3316 
3317           /* explictly compute R^-T */
3318           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3319           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3320           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3321           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3322           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3323           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3324           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3325           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3326           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3327           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3328 
3329           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3330           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3331           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3332           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3333           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3334           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3335           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3336           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3337           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3338 
3339           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3340              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3341              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3342           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3343           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3344           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3345           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3346           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3347           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3348           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3349           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3350           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3351           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3352 
3353           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3354           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3355           /* insert cols for primal dofs */
3356           for (j=0;j<primal_dofs;j++) {
3357             start_vals = &qr_basis[j*size_of_constraint];
3358             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3359             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3360           }
3361           /* insert cols for dual dofs */
3362           for (j=0,k=0;j<dual_dofs;k++) {
3363             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3364               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3365               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3366               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3367               j++;
3368             }
3369           }
3370 
3371           /* check change of basis */
3372           if (pcbddc->dbg_flag) {
3373             PetscInt   ii,jj;
3374             PetscBool valid_qr=PETSC_TRUE;
3375             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3376             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3377             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3378             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3379             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3380             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3381             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3382             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3383             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3384             for (jj=0;jj<size_of_constraint;jj++) {
3385               for (ii=0;ii<primal_dofs;ii++) {
3386                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3387                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3388               }
3389             }
3390             if (!valid_qr) {
3391               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3392               for (jj=0;jj<size_of_constraint;jj++) {
3393                 for (ii=0;ii<primal_dofs;ii++) {
3394                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3395                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3396                   }
3397                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3398                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3399                   }
3400                 }
3401               }
3402             } else {
3403               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3404             }
3405           }
3406         } else { /* simple transformation block */
3407           PetscInt    row,col;
3408           PetscScalar val,norm;
3409 
3410           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3411           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3412           for (j=0;j<size_of_constraint;j++) {
3413             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3414             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3415             if (!PetscBTLookup(is_primal,row_B)) {
3416               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3417               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3418               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3419             } else {
3420               for (k=0;k<size_of_constraint;k++) {
3421                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3422                 if (row != col) {
3423                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3424                 } else {
3425                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3426                 }
3427                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3428               }
3429             }
3430           }
3431           if (pcbddc->dbg_flag) {
3432             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3433           }
3434         }
3435       } else {
3436         if (pcbddc->dbg_flag) {
3437           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3438         }
3439       }
3440     }
3441 
3442     /* free workspace */
3443     if (qr_needed) {
3444       if (pcbddc->dbg_flag) {
3445         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3446       }
3447       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3448       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3449       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3450       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3451       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3452     }
3453     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3454     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3455     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3456 
3457     /* assembling of global change of variable */
3458     {
3459       Mat      tmat;
3460       PetscInt bs;
3461 
3462       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3463       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3464       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3465       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3466       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3467       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3468       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3469       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3470       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3471       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3472       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3473       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3474       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3475       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3476       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3477       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3478       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3479       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3480     }
3481     /* check */
3482     if (pcbddc->dbg_flag) {
3483       PetscReal error;
3484       Vec       x,x_change;
3485 
3486       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3487       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3488       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3489       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3490       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3491       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3492       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3493       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3494       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3495       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3496       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3497       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3498       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3499       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3500       ierr = VecDestroy(&x);CHKERRQ(ierr);
3501       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3502     }
3503 
3504     /* adapt sub_schurs computed (if any) */
3505     if (pcbddc->use_deluxe_scaling) {
3506       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3507       if (sub_schurs->S_Ej_all) {
3508         Mat                    S_new,tmat;
3509         ISLocalToGlobalMapping NtoSall;
3510         IS                     is_all_N,is_V,is_V_Sall;
3511         const PetscScalar      *array;
3512         const PetscInt         *idxs_V,*idxs_all;
3513         PetscInt               i,n_V;
3514 
3515         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3516         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3517         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3518         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3519         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3520         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3521         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3522         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3523         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3524         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3525         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3526         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3527         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3528         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3529         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3530         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3531         for (i=0;i<n_V;i++) {
3532           PetscScalar val;
3533           PetscInt    idx;
3534 
3535           idx = idxs_V[i];
3536           val = array[idxs_all[idxs_V[i]]];
3537           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3538         }
3539         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3540         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3541         sub_schurs->S_Ej_all = S_new;
3542         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3543         if (sub_schurs->sum_S_Ej_all) {
3544           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3545           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3546           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3547           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3548           sub_schurs->sum_S_Ej_all = S_new;
3549           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3550         }
3551         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3552         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3553         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3554         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3555         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3556       }
3557     }
3558     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3559   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3560     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3561     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3562   }
3563 
3564   /* set up change of basis context */
3565   if (pcbddc->ChangeOfBasisMatrix) {
3566     PCBDDCChange_ctx change_ctx;
3567 
3568     if (!pcbddc->new_global_mat) {
3569       PetscInt global_size,local_size;
3570 
3571       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3572       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3573       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3574       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3575       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3576       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3577       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3578       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3579       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3580     } else {
3581       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3582       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3583       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3584     }
3585     if (!pcbddc->user_ChangeOfBasisMatrix) {
3586       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3587       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3588     } else {
3589       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3590       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3591     }
3592     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3593     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3594     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3595     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596   }
3597 
3598   /* add pressure dofs to set of primal nodes for numbering purposes */
3599   for (i=0;i<pcbddc->benign_n;i++) {
3600     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3601     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3602     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3603     pcbddc->local_primal_size_cc++;
3604     pcbddc->local_primal_size++;
3605   }
3606 
3607   /* check if a new primal space has been introduced (also take into account benign trick) */
3608   pcbddc->new_primal_space_local = PETSC_TRUE;
3609   if (olocal_primal_size == pcbddc->local_primal_size) {
3610     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3611     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3612     if (!pcbddc->new_primal_space_local) {
3613       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3614       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3615     }
3616   }
3617   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3618   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3619   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3620 
3621   /* flush dbg viewer */
3622   if (pcbddc->dbg_flag) {
3623     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3624   }
3625 
3626   /* free workspace */
3627   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3628   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3629   if (!pcbddc->adaptive_selection) {
3630     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3631     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3632   } else {
3633     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3634                       pcbddc->adaptive_constraints_idxs_ptr,
3635                       pcbddc->adaptive_constraints_data_ptr,
3636                       pcbddc->adaptive_constraints_idxs,
3637                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3638     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3639     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3640   }
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 #undef __FUNCT__
3645 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3646 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3647 {
3648   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3649   PC_IS       *pcis = (PC_IS*)pc->data;
3650   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3651   PetscInt    ierr,i,vertex_size,N;
3652   PetscViewer viewer=pcbddc->dbg_viewer;
3653 
3654   PetscFunctionBegin;
3655   /* Reset previously computed graph */
3656   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3657   /* Init local Graph struct */
3658   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3659   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3660 
3661   /* Check validity of the csr graph passed in by the user */
3662   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3663     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3664   }
3665 
3666   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3667   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3668     PetscInt  *xadj,*adjncy;
3669     PetscInt  nvtxs;
3670     PetscBool flg_row=PETSC_FALSE;
3671 
3672     if (pcbddc->use_local_adj) {
3673 
3674       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3675       if (flg_row) {
3676         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3677         pcbddc->computed_rowadj = PETSC_TRUE;
3678       }
3679       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3680     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3681       IS                     is_dummy;
3682       ISLocalToGlobalMapping l2gmap_dummy;
3683       PetscInt               j,sum;
3684       PetscInt               *cxadj,*cadjncy;
3685       const PetscInt         *idxs;
3686       PCBDDCGraph            graph;
3687       PetscBT                is_on_boundary;
3688 
3689       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3690       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3691       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3692       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3693       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3694       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3695       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3696       if (flg_row) {
3697         graph->xadj = xadj;
3698         graph->adjncy = adjncy;
3699       }
3700       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3701       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3702       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3703 
3704       if (pcbddc->dbg_flag) {
3705         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3706         for (i=0;i<graph->ncc;i++) {
3707           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3708         }
3709       }
3710 
3711       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3712       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3713       for (i=0;i<pcis->n_B;i++) {
3714         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3715       }
3716       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3717 
3718       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3719       sum = 0;
3720       for (i=0;i<graph->ncc;i++) {
3721         PetscInt sizecc = 0;
3722         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3723           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3724             sizecc++;
3725           }
3726         }
3727         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3728           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3729             cxadj[graph->queue[j]] = sizecc;
3730           }
3731         }
3732         sum += sizecc*sizecc;
3733       }
3734       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3735       sum = 0;
3736       for (i=0;i<pcis->n;i++) {
3737         PetscInt temp = cxadj[i];
3738         cxadj[i] = sum;
3739         sum += temp;
3740       }
3741       cxadj[pcis->n] = sum;
3742       for (i=0;i<graph->ncc;i++) {
3743         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3744           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3745             PetscInt k,sizecc = 0;
3746             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3747               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3748                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3749                 sizecc++;
3750               }
3751             }
3752           }
3753         }
3754       }
3755       if (sum) {
3756         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3757       } else {
3758         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3759         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3760       }
3761       graph->xadj = 0;
3762       graph->adjncy = 0;
3763       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3764       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3765     }
3766   }
3767   if (pcbddc->dbg_flag) {
3768     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3769   }
3770 
3771   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3772   vertex_size = 1;
3773   if (pcbddc->user_provided_isfordofs) {
3774     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3775       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3776       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3777         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3778         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3779       }
3780       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3781       pcbddc->n_ISForDofs = 0;
3782       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3783     }
3784     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3785     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3786   } else {
3787     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3788       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3789       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3790       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3791         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3792       }
3793     }
3794   }
3795 
3796   /* Setup of Graph */
3797   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3798     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3799   }
3800   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3801     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3802   }
3803   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3804 
3805   /* attach info on disconnected subdomains if present */
3806   if (pcbddc->n_local_subs) {
3807     PetscInt *local_subs;
3808 
3809     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3810     for (i=0;i<pcbddc->n_local_subs;i++) {
3811       const PetscInt *idxs;
3812       PetscInt       nl,j;
3813 
3814       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3815       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3816       for (j=0;j<nl;j++) {
3817         local_subs[idxs[j]] = i;
3818       }
3819       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3820     }
3821     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3822     pcbddc->mat_graph->local_subs = local_subs;
3823   }
3824 
3825   /* Graph's connected components analysis */
3826   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3827 
3828   /* print some info to stdout */
3829   if (pcbddc->dbg_flag) {
3830     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3831   }
3832 
3833   /* mark topography has done */
3834   pcbddc->recompute_topography = PETSC_FALSE;
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3839 #undef __FUNCT__
3840 #define __FUNCT__ "PCBDDCSubsetNumbering"
3841 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3842 {
3843   PetscSF        sf;
3844   PetscLayout    map;
3845   const PetscInt *idxs;
3846   PetscInt       *leaf_data,*root_data,*gidxs;
3847   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3848   PetscInt       n_n,nlocals,start,first_index;
3849   PetscMPIInt    commsize;
3850   PetscBool      first_found;
3851   PetscErrorCode ierr;
3852 
3853   PetscFunctionBegin;
3854   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3855   if (subset_mult) {
3856     PetscCheckSameComm(subset,1,subset_mult,2);
3857     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3858     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3859   }
3860   /* create workspace layout for computing global indices of subset */
3861   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3862   lbounds[0] = lbounds[1] = 0;
3863   for (i=0;i<n;i++) {
3864     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3865     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3866   }
3867   lbounds[0] = -lbounds[0];
3868   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3869   gbounds[0] = -gbounds[0];
3870   N = gbounds[1] - gbounds[0] + 1;
3871   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3872   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3873   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3874   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3875   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3876 
3877   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3878   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3879   if (subset_mult) {
3880     const PetscInt* idxs_mult;
3881 
3882     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3883     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3884     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3885   } else {
3886     for (i=0;i<n;i++) leaf_data[i] = 1;
3887   }
3888   /* local size of new subset */
3889   n_n = 0;
3890   for (i=0;i<n;i++) n_n += leaf_data[i];
3891 
3892   /* global indexes in layout */
3893   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3894   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3895   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3896   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3897   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3898   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3899 
3900   /* reduce from leaves to roots */
3901   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3902   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3903   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3904 
3905   /* count indexes in local part of layout */
3906   nlocals = 0;
3907   first_index = -1;
3908   first_found = PETSC_FALSE;
3909   for (i=0;i<Nl;i++) {
3910     if (!first_found && root_data[i]) {
3911       first_found = PETSC_TRUE;
3912       first_index = i;
3913     }
3914     nlocals += root_data[i];
3915   }
3916 
3917   /* cumulative of number of indexes and size of subset without holes */
3918 #if defined(PETSC_HAVE_MPI_EXSCAN)
3919   start = 0;
3920   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3921 #else
3922   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3923   start = start-nlocals;
3924 #endif
3925 
3926   if (N_n) { /* compute total size of new subset if requested */
3927     *N_n = start + nlocals;
3928     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3929     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3930   }
3931 
3932   /* adapt root data with cumulative */
3933   if (first_found) {
3934     PetscInt old_index;
3935 
3936     root_data[first_index] += start;
3937     old_index = first_index;
3938     for (i=first_index+1;i<Nl;i++) {
3939       if (root_data[i]) {
3940         root_data[i] += root_data[old_index];
3941         old_index = i;
3942       }
3943     }
3944   }
3945 
3946   /* from roots to leaves */
3947   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3948   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3949   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3950 
3951   /* create new IS with global indexes without holes */
3952   if (subset_mult) {
3953     const PetscInt* idxs_mult;
3954     PetscInt        cum;
3955 
3956     cum = 0;
3957     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3958     for (i=0;i<n;i++) {
3959       PetscInt j;
3960       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3961     }
3962     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3963   } else {
3964     for (i=0;i<n;i++) {
3965       gidxs[i] = leaf_data[i]-1;
3966     }
3967   }
3968   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3969   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3975 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3976 {
3977   PetscInt       i,j;
3978   PetscScalar    *alphas;
3979   PetscErrorCode ierr;
3980 
3981   PetscFunctionBegin;
3982   /* this implements stabilized Gram-Schmidt */
3983   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3984   for (i=0;i<n;i++) {
3985     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3986     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3987     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3988   }
3989   ierr = PetscFree(alphas);CHKERRQ(ierr);
3990   PetscFunctionReturn(0);
3991 }
3992 
3993 #undef __FUNCT__
3994 #define __FUNCT__ "MatISGetSubassemblingPattern"
3995 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3996 {
3997   IS             ranks_send_to;
3998   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3999   PetscMPIInt    size,rank,color;
4000   PetscInt       *xadj,*adjncy;
4001   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4002   PetscInt       i,local_size,threshold=0;
4003   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4004   PetscSubcomm   subcomm;
4005   PetscErrorCode ierr;
4006 
4007   PetscFunctionBegin;
4008   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4009   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4010   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4011 
4012   /* Get info on mapping */
4013   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4014   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4015 
4016   /* build local CSR graph of subdomains' connectivity */
4017   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4018   xadj[0] = 0;
4019   xadj[1] = PetscMax(n_neighs-1,0);
4020   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4021   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4022 
4023   if (threshold) {
4024     PetscInt xadj_count = 0;
4025     for (i=1;i<n_neighs;i++) {
4026       if (n_shared[i] > threshold) {
4027         adjncy[xadj_count] = neighs[i];
4028         adjncy_wgt[xadj_count] = n_shared[i];
4029         xadj_count++;
4030       }
4031     }
4032     xadj[1] = xadj_count;
4033   } else {
4034     if (xadj[1]) {
4035       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4036       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4037     }
4038   }
4039   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4040   if (use_square) {
4041     for (i=0;i<xadj[1];i++) {
4042       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4043     }
4044   }
4045   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4046 
4047   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4048 
4049   /*
4050     Restrict work on active processes only.
4051   */
4052   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4053   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4054   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4055   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4056   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4057   if (color) {
4058     ierr = PetscFree(xadj);CHKERRQ(ierr);
4059     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4060     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4061   } else {
4062     Mat             subdomain_adj;
4063     IS              new_ranks,new_ranks_contig;
4064     MatPartitioning partitioner;
4065     PetscInt        prank,rstart=0,rend=0;
4066     PetscInt        *is_indices,*oldranks;
4067     PetscBool       aggregate;
4068 
4069     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4070     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4071     prank = rank;
4072     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4073     /*
4074     for (i=0;i<size;i++) {
4075       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4076     }
4077     */
4078     for (i=0;i<xadj[1];i++) {
4079       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4080     }
4081     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4082     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4083     if (aggregate) {
4084       PetscInt    lrows,row,ncols,*cols;
4085       PetscMPIInt nrank;
4086       PetscScalar *vals;
4087 
4088       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4089       lrows = 0;
4090       if (nrank<redprocs) {
4091         lrows = size/redprocs;
4092         if (nrank<size%redprocs) lrows++;
4093       }
4094       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4095       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4096       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4097       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4098       row = nrank;
4099       ncols = xadj[1]-xadj[0];
4100       cols = adjncy;
4101       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4102       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4103       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4104       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4105       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4106       ierr = PetscFree(xadj);CHKERRQ(ierr);
4107       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4108       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4109       ierr = PetscFree(vals);CHKERRQ(ierr);
4110     } else {
4111       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4112     }
4113     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4114 
4115     /* Partition */
4116     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4117     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4118     if (use_vwgt) {
4119       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4120       v_wgt[0] = local_size;
4121       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4122     }
4123     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4124     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4125     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4126     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4127     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4128 
4129     /* renumber new_ranks to avoid "holes" in new set of processors */
4130     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4131     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4132     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4133     if (!redprocs) {
4134       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4135     } else {
4136       PetscInt    idxs[1];
4137       PetscMPIInt tag;
4138       MPI_Request *reqs;
4139 
4140       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4141       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4142       for (i=rstart;i<rend;i++) {
4143         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4144       }
4145       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4146       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4147       ierr = PetscFree(reqs);CHKERRQ(ierr);
4148       ranks_send_to_idx[0] = oldranks[idxs[0]];
4149     }
4150     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4151     /* clean up */
4152     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4153     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4154     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4155     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4156   }
4157   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4158 
4159   /* assemble parallel IS for sends */
4160   i = 1;
4161   if (color) i=0;
4162   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4163   /* get back IS */
4164   *is_sends = ranks_send_to;
4165   PetscFunctionReturn(0);
4166 }
4167 
4168 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4169 
4170 #undef __FUNCT__
4171 #define __FUNCT__ "MatISSubassemble"
4172 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4173 {
4174   Mat                    local_mat;
4175   IS                     is_sends_internal;
4176   PetscInt               rows,cols,new_local_rows;
4177   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4178   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4179   ISLocalToGlobalMapping l2gmap;
4180   PetscInt*              l2gmap_indices;
4181   const PetscInt*        is_indices;
4182   MatType                new_local_type;
4183   /* buffers */
4184   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4185   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4186   PetscInt               *recv_buffer_idxs_local;
4187   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4188   /* MPI */
4189   MPI_Comm               comm,comm_n;
4190   PetscSubcomm           subcomm;
4191   PetscMPIInt            n_sends,n_recvs,commsize;
4192   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4193   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4194   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4195   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4196   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4197   PetscErrorCode         ierr;
4198 
4199   PetscFunctionBegin;
4200   /* TODO: add missing checks */
4201   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4202   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4203   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4204   PetscValidLogicalCollectiveInt(mat,nis,7);
4205   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4206   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4207   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4208   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4209   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4210   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4211   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4212   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4213     PetscInt mrows,mcols,mnrows,mncols;
4214     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4215     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4216     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4217     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4218     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4219     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4220   }
4221   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4222   PetscValidLogicalCollectiveInt(mat,bs,0);
4223   /* prepare IS for sending if not provided */
4224   if (!is_sends) {
4225     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4226     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4227   } else {
4228     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4229     is_sends_internal = is_sends;
4230   }
4231 
4232   /* get comm */
4233   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4234 
4235   /* compute number of sends */
4236   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4237   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4238 
4239   /* compute number of receives */
4240   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4241   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4242   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4243   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4244   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4245   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4246   ierr = PetscFree(iflags);CHKERRQ(ierr);
4247 
4248   /* restrict comm if requested */
4249   subcomm = 0;
4250   destroy_mat = PETSC_FALSE;
4251   if (restrict_comm) {
4252     PetscMPIInt color,subcommsize;
4253 
4254     color = 0;
4255     if (restrict_full) {
4256       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4257     } else {
4258       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4259     }
4260     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4261     subcommsize = commsize - subcommsize;
4262     /* check if reuse has been requested */
4263     if (reuse == MAT_REUSE_MATRIX) {
4264       if (*mat_n) {
4265         PetscMPIInt subcommsize2;
4266         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4267         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4268         comm_n = PetscObjectComm((PetscObject)*mat_n);
4269       } else {
4270         comm_n = PETSC_COMM_SELF;
4271       }
4272     } else { /* MAT_INITIAL_MATRIX */
4273       PetscMPIInt rank;
4274 
4275       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4276       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4277       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4278       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4279       comm_n = PetscSubcommChild(subcomm);
4280     }
4281     /* flag to destroy *mat_n if not significative */
4282     if (color) destroy_mat = PETSC_TRUE;
4283   } else {
4284     comm_n = comm;
4285   }
4286 
4287   /* prepare send/receive buffers */
4288   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4289   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4290   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4291   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4292   if (nis) {
4293     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4294   }
4295 
4296   /* Get data from local matrices */
4297   if (!isdense) {
4298     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4299     /* TODO: See below some guidelines on how to prepare the local buffers */
4300     /*
4301        send_buffer_vals should contain the raw values of the local matrix
4302        send_buffer_idxs should contain:
4303        - MatType_PRIVATE type
4304        - PetscInt        size_of_l2gmap
4305        - PetscInt        global_row_indices[size_of_l2gmap]
4306        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4307     */
4308   } else {
4309     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4310     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4311     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4312     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4313     send_buffer_idxs[1] = i;
4314     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4315     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4316     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4317     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4318     for (i=0;i<n_sends;i++) {
4319       ilengths_vals[is_indices[i]] = len*len;
4320       ilengths_idxs[is_indices[i]] = len+2;
4321     }
4322   }
4323   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4324   /* additional is (if any) */
4325   if (nis) {
4326     PetscMPIInt psum;
4327     PetscInt j;
4328     for (j=0,psum=0;j<nis;j++) {
4329       PetscInt plen;
4330       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4331       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4332       psum += len+1; /* indices + lenght */
4333     }
4334     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4335     for (j=0,psum=0;j<nis;j++) {
4336       PetscInt plen;
4337       const PetscInt *is_array_idxs;
4338       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4339       send_buffer_idxs_is[psum] = plen;
4340       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4341       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4342       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4343       psum += plen+1; /* indices + lenght */
4344     }
4345     for (i=0;i<n_sends;i++) {
4346       ilengths_idxs_is[is_indices[i]] = psum;
4347     }
4348     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4349   }
4350 
4351   buf_size_idxs = 0;
4352   buf_size_vals = 0;
4353   buf_size_idxs_is = 0;
4354   for (i=0;i<n_recvs;i++) {
4355     buf_size_idxs += (PetscInt)olengths_idxs[i];
4356     buf_size_vals += (PetscInt)olengths_vals[i];
4357     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4358   }
4359   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4360   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4361   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4362 
4363   /* get new tags for clean communications */
4364   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4365   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4366   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4367 
4368   /* allocate for requests */
4369   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4370   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4371   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4372   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4373   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4374   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4375 
4376   /* communications */
4377   ptr_idxs = recv_buffer_idxs;
4378   ptr_vals = recv_buffer_vals;
4379   ptr_idxs_is = recv_buffer_idxs_is;
4380   for (i=0;i<n_recvs;i++) {
4381     source_dest = onodes[i];
4382     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4383     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4384     ptr_idxs += olengths_idxs[i];
4385     ptr_vals += olengths_vals[i];
4386     if (nis) {
4387       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4388       ptr_idxs_is += olengths_idxs_is[i];
4389     }
4390   }
4391   for (i=0;i<n_sends;i++) {
4392     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4393     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4394     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4395     if (nis) {
4396       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4397     }
4398   }
4399   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4400   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4401 
4402   /* assemble new l2g map */
4403   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4404   ptr_idxs = recv_buffer_idxs;
4405   new_local_rows = 0;
4406   for (i=0;i<n_recvs;i++) {
4407     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4408     ptr_idxs += olengths_idxs[i];
4409   }
4410   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4411   ptr_idxs = recv_buffer_idxs;
4412   new_local_rows = 0;
4413   for (i=0;i<n_recvs;i++) {
4414     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4415     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4416     ptr_idxs += olengths_idxs[i];
4417   }
4418   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4419   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4420   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4421 
4422   /* infer new local matrix type from received local matrices type */
4423   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4424   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4425   if (n_recvs) {
4426     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4427     ptr_idxs = recv_buffer_idxs;
4428     for (i=0;i<n_recvs;i++) {
4429       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4430         new_local_type_private = MATAIJ_PRIVATE;
4431         break;
4432       }
4433       ptr_idxs += olengths_idxs[i];
4434     }
4435     switch (new_local_type_private) {
4436       case MATDENSE_PRIVATE:
4437         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4438           new_local_type = MATSEQAIJ;
4439           bs = 1;
4440         } else { /* if I receive only 1 dense matrix */
4441           new_local_type = MATSEQDENSE;
4442           bs = 1;
4443         }
4444         break;
4445       case MATAIJ_PRIVATE:
4446         new_local_type = MATSEQAIJ;
4447         bs = 1;
4448         break;
4449       case MATBAIJ_PRIVATE:
4450         new_local_type = MATSEQBAIJ;
4451         break;
4452       case MATSBAIJ_PRIVATE:
4453         new_local_type = MATSEQSBAIJ;
4454         break;
4455       default:
4456         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4457         break;
4458     }
4459   } else { /* by default, new_local_type is seqdense */
4460     new_local_type = MATSEQDENSE;
4461     bs = 1;
4462   }
4463 
4464   /* create MATIS object if needed */
4465   if (reuse == MAT_INITIAL_MATRIX) {
4466     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4467     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4468   } else {
4469     /* it also destroys the local matrices */
4470     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4471   }
4472   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4473   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4474 
4475   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4476 
4477   /* Global to local map of received indices */
4478   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4479   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4480   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4481 
4482   /* restore attributes -> type of incoming data and its size */
4483   buf_size_idxs = 0;
4484   for (i=0;i<n_recvs;i++) {
4485     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4486     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4487     buf_size_idxs += (PetscInt)olengths_idxs[i];
4488   }
4489   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4490 
4491   /* set preallocation */
4492   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4493   if (!newisdense) {
4494     PetscInt *new_local_nnz=0;
4495 
4496     ptr_vals = recv_buffer_vals;
4497     ptr_idxs = recv_buffer_idxs_local;
4498     if (n_recvs) {
4499       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4500     }
4501     for (i=0;i<n_recvs;i++) {
4502       PetscInt j;
4503       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4504         for (j=0;j<*(ptr_idxs+1);j++) {
4505           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4506         }
4507       } else {
4508         /* TODO */
4509       }
4510       ptr_idxs += olengths_idxs[i];
4511     }
4512     if (new_local_nnz) {
4513       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4514       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4515       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4516       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4517       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4518       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4519     } else {
4520       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4521     }
4522     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4523   } else {
4524     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4525   }
4526 
4527   /* set values */
4528   ptr_vals = recv_buffer_vals;
4529   ptr_idxs = recv_buffer_idxs_local;
4530   for (i=0;i<n_recvs;i++) {
4531     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4532       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4533       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4534       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4535       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4536       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4537     } else {
4538       /* TODO */
4539     }
4540     ptr_idxs += olengths_idxs[i];
4541     ptr_vals += olengths_vals[i];
4542   }
4543   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4544   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4545   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4546   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4548   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4549 
4550 #if 0
4551   if (!restrict_comm) { /* check */
4552     Vec       lvec,rvec;
4553     PetscReal infty_error;
4554 
4555     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4556     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4557     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4558     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4559     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4560     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4561     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4562     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4563     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4564   }
4565 #endif
4566 
4567   /* assemble new additional is (if any) */
4568   if (nis) {
4569     PetscInt **temp_idxs,*count_is,j,psum;
4570 
4571     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4572     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4573     ptr_idxs = recv_buffer_idxs_is;
4574     psum = 0;
4575     for (i=0;i<n_recvs;i++) {
4576       for (j=0;j<nis;j++) {
4577         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4578         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4579         psum += plen;
4580         ptr_idxs += plen+1; /* shift pointer to received data */
4581       }
4582     }
4583     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4584     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4585     for (i=1;i<nis;i++) {
4586       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4587     }
4588     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4589     ptr_idxs = recv_buffer_idxs_is;
4590     for (i=0;i<n_recvs;i++) {
4591       for (j=0;j<nis;j++) {
4592         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4593         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4594         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4595         ptr_idxs += plen+1; /* shift pointer to received data */
4596       }
4597     }
4598     for (i=0;i<nis;i++) {
4599       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4600       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4601       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4602     }
4603     ierr = PetscFree(count_is);CHKERRQ(ierr);
4604     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4605     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4606   }
4607   /* free workspace */
4608   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4609   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4610   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4611   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4612   if (isdense) {
4613     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4614     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4615   } else {
4616     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4617   }
4618   if (nis) {
4619     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4620     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4621   }
4622   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4623   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4624   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4625   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4626   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4627   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4628   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4629   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4630   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4631   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4632   ierr = PetscFree(onodes);CHKERRQ(ierr);
4633   if (nis) {
4634     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4635     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4636     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4637   }
4638   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4639   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4640     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4641     for (i=0;i<nis;i++) {
4642       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4643     }
4644     *mat_n = NULL;
4645   }
4646   PetscFunctionReturn(0);
4647 }
4648 
4649 /* temporary hack into ksp private data structure */
4650 #include <petsc/private/kspimpl.h>
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4654 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4655 {
4656   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4657   PC_IS                  *pcis = (PC_IS*)pc->data;
4658   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4659   MatNullSpace           CoarseNullSpace=NULL;
4660   ISLocalToGlobalMapping coarse_islg;
4661   IS                     coarse_is,*isarray;
4662   PetscInt               i,im_active=-1,active_procs=-1;
4663   PetscInt               nis,nisdofs,nisneu;
4664   PC                     pc_temp;
4665   PCType                 coarse_pc_type;
4666   KSPType                coarse_ksp_type;
4667   PetscBool              multilevel_requested,multilevel_allowed;
4668   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4669   Mat                    t_coarse_mat_is;
4670   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4671   PetscMPIInt            all_procs;
4672   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4673   PetscBool              compute_vecs = PETSC_FALSE;
4674   PetscScalar            *array;
4675   PetscErrorCode         ierr;
4676 
4677   PetscFunctionBegin;
4678   /* Assign global numbering to coarse dofs */
4679   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4680     PetscInt ocoarse_size;
4681     compute_vecs = PETSC_TRUE;
4682     ocoarse_size = pcbddc->coarse_size;
4683     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4684     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4685     /* see if we can avoid some work */
4686     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4687       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4688       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4689         PC        pc;
4690         PetscBool isbddc;
4691 
4692         /* temporary workaround since PCBDDC does not have a reset method so far */
4693         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4694         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4695         if (isbddc) {
4696           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4697         }
4698         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4699         coarse_reuse = PETSC_FALSE;
4700       } else { /* we can safely reuse already computed coarse matrix */
4701         coarse_reuse = PETSC_TRUE;
4702       }
4703     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4704       coarse_reuse = PETSC_FALSE;
4705     }
4706     /* reset any subassembling information */
4707     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4708     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4709   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4710     coarse_reuse = PETSC_TRUE;
4711   }
4712 
4713   /* count "active" (i.e. with positive local size) and "void" processes */
4714   im_active = !!(pcis->n);
4715   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4716   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4717   void_procs = all_procs-active_procs;
4718   csin_type_simple = PETSC_TRUE;
4719   redist = PETSC_FALSE;
4720   if (pcbddc->current_level && void_procs) {
4721     csin_ml = PETSC_TRUE;
4722     ncoarse_ml = void_procs;
4723     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4724     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4725       csin_ds = PETSC_TRUE;
4726       ncoarse_ds = pcbddc->redistribute_coarse;
4727       redist = PETSC_TRUE;
4728     } else {
4729       csin_ds = PETSC_TRUE;
4730       ncoarse_ds = active_procs;
4731       redist = PETSC_TRUE;
4732     }
4733   } else {
4734     csin_ml = PETSC_FALSE;
4735     ncoarse_ml = all_procs;
4736     if (void_procs) {
4737       csin_ds = PETSC_TRUE;
4738       ncoarse_ds = void_procs;
4739       csin_type_simple = PETSC_FALSE;
4740     } else {
4741       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4742         csin_ds = PETSC_TRUE;
4743         ncoarse_ds = pcbddc->redistribute_coarse;
4744         redist = PETSC_TRUE;
4745       } else {
4746         csin_ds = PETSC_FALSE;
4747         ncoarse_ds = all_procs;
4748       }
4749     }
4750   }
4751 
4752   /*
4753     test if we can go multilevel: three conditions must be satisfied:
4754     - we have not exceeded the number of levels requested
4755     - we can actually subassemble the active processes
4756     - we can find a suitable number of MPI processes where we can place the subassembled problem
4757   */
4758   multilevel_allowed = PETSC_FALSE;
4759   multilevel_requested = PETSC_FALSE;
4760   if (pcbddc->current_level < pcbddc->max_levels) {
4761     multilevel_requested = PETSC_TRUE;
4762     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4763       multilevel_allowed = PETSC_FALSE;
4764     } else {
4765       multilevel_allowed = PETSC_TRUE;
4766     }
4767   }
4768   /* determine number of process partecipating to coarse solver */
4769   if (multilevel_allowed) {
4770     ncoarse = ncoarse_ml;
4771     csin = csin_ml;
4772     redist = PETSC_FALSE;
4773   } else {
4774     ncoarse = ncoarse_ds;
4775     csin = csin_ds;
4776   }
4777 
4778   /* creates temporary l2gmap and IS for coarse indexes */
4779   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4780   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4781 
4782   /* creates temporary MATIS object for coarse matrix */
4783   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4784   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4785   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4786   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4787 #if 0
4788   {
4789     PetscViewer viewer;
4790     char filename[256];
4791     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4792     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4793     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4794     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4795     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4796   }
4797 #endif
4798   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4799   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4800   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4801   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4802   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4803 
4804   /* compute dofs splitting and neumann boundaries for coarse dofs */
4805   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4806     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4807     const PetscInt         *idxs;
4808     ISLocalToGlobalMapping tmap;
4809 
4810     /* create map between primal indices (in local representative ordering) and local primal numbering */
4811     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4812     /* allocate space for temporary storage */
4813     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4814     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4815     /* allocate for IS array */
4816     nisdofs = pcbddc->n_ISForDofsLocal;
4817     nisneu = !!pcbddc->NeumannBoundariesLocal;
4818     nis = nisdofs + nisneu;
4819     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4820     /* dofs splitting */
4821     for (i=0;i<nisdofs;i++) {
4822       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4823       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4824       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4825       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4826       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4827       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4828       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4829       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4830     }
4831     /* neumann boundaries */
4832     if (pcbddc->NeumannBoundariesLocal) {
4833       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4834       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4835       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4836       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4837       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4838       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4839       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4840       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4841     }
4842     /* free memory */
4843     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4844     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4845     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4846   } else {
4847     nis = 0;
4848     nisdofs = 0;
4849     nisneu = 0;
4850     isarray = NULL;
4851   }
4852   /* destroy no longer needed map */
4853   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4854 
4855   /* restrict on coarse candidates (if needed) */
4856   coarse_mat_is = NULL;
4857   if (csin) {
4858     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4859       if (redist) {
4860         PetscMPIInt rank;
4861         PetscInt    spc,n_spc_p1,dest[1],destsize;
4862 
4863         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4864         spc = active_procs/ncoarse;
4865         n_spc_p1 = active_procs%ncoarse;
4866         if (im_active) {
4867           destsize = 1;
4868           if (rank > n_spc_p1*(spc+1)-1) {
4869             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4870           } else {
4871             dest[0] = rank/(spc+1);
4872           }
4873         } else {
4874           destsize = 0;
4875         }
4876         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4877       } else if (csin_type_simple) {
4878         PetscMPIInt rank;
4879         PetscInt    issize,isidx;
4880 
4881         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4882         if (im_active) {
4883           issize = 1;
4884           isidx = (PetscInt)rank;
4885         } else {
4886           issize = 0;
4887           isidx = -1;
4888         }
4889         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4890       } else { /* get a suitable subassembling pattern from MATIS code */
4891         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4892       }
4893 
4894       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4895       if (!redist || ncoarse <= void_procs) {
4896         PetscInt ncoarse_cand,tissize,*nisindices;
4897         PetscInt *coarse_candidates;
4898         const PetscInt* tisindices;
4899 
4900         /* get coarse candidates' ranks in pc communicator */
4901         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4902         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4903         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4904           if (!coarse_candidates[i]) {
4905             coarse_candidates[ncoarse_cand++]=i;
4906           }
4907         }
4908         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4909 
4910 
4911         if (pcbddc->dbg_flag) {
4912           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4913           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4914           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4915           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4916           for (i=0;i<ncoarse_cand;i++) {
4917             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4918           }
4919           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4920           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4921         }
4922         /* shift the pattern on coarse candidates */
4923         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4924         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4925         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4926         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4927         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4928         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4929         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4930       }
4931       if (pcbddc->dbg_flag) {
4932         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4933         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4934         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4935         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4936       }
4937     }
4938     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4939     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4940       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4941     } else { /* this is the last level, so use just receiving processes in subcomm */
4942       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4943     }
4944   } else {
4945     if (pcbddc->dbg_flag) {
4946       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4947       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4948       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4949     }
4950     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4951     coarse_mat_is = t_coarse_mat_is;
4952   }
4953 
4954   /* create local to global scatters for coarse problem */
4955   if (compute_vecs) {
4956     PetscInt lrows;
4957     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4958     if (coarse_mat_is) {
4959       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4960     } else {
4961       lrows = 0;
4962     }
4963     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4964     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4965     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4966     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4967     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4968   }
4969   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4970   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4971 
4972   /* set defaults for coarse KSP and PC */
4973   if (multilevel_allowed) {
4974     coarse_ksp_type = KSPRICHARDSON;
4975     coarse_pc_type = PCBDDC;
4976   } else {
4977     coarse_ksp_type = KSPPREONLY;
4978     coarse_pc_type = PCREDUNDANT;
4979   }
4980 
4981   /* print some info if requested */
4982   if (pcbddc->dbg_flag) {
4983     if (!multilevel_allowed) {
4984       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4985       if (multilevel_requested) {
4986         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4987       } else if (pcbddc->max_levels) {
4988         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4989       }
4990       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4991     }
4992   }
4993 
4994   /* create the coarse KSP object only once with defaults */
4995   if (coarse_mat_is) {
4996     MatReuse coarse_mat_reuse;
4997     PetscViewer dbg_viewer = NULL;
4998     if (pcbddc->dbg_flag) {
4999       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5000       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5001     }
5002     if (!pcbddc->coarse_ksp) {
5003       char prefix[256],str_level[16];
5004       size_t len;
5005       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5006       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5007       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5008       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5009       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5010       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5011       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5012       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5013       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5014       /* prefix */
5015       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5016       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5017       if (!pcbddc->current_level) {
5018         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5019         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5020       } else {
5021         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5022         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5023         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5024         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5025         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5026         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5027       }
5028       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5029       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5030       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5031       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5032       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5033       /* allow user customization */
5034       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5035     }
5036     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5037     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5038     if (nisdofs) {
5039       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5040       for (i=0;i<nisdofs;i++) {
5041         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5042       }
5043     }
5044     if (nisneu) {
5045       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5046       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5047     }
5048 
5049     /* get some info after set from options */
5050     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5051     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5052     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5053     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5054       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5055       isbddc = PETSC_FALSE;
5056     }
5057     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5058     if (isredundant) {
5059       KSP inner_ksp;
5060       PC  inner_pc;
5061       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5062       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5063       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5064     }
5065 
5066     /* assemble coarse matrix */
5067     if (coarse_reuse) {
5068       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5069       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5070       coarse_mat_reuse = MAT_REUSE_MATRIX;
5071     } else {
5072       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5073     }
5074     if (isbddc || isnn) {
5075       if (pcbddc->coarsening_ratio > 1) {
5076         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5077           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5078           if (pcbddc->dbg_flag) {
5079             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5080             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5081             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5082             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5083           }
5084         }
5085         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5086       } else {
5087         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5088         coarse_mat = coarse_mat_is;
5089       }
5090     } else {
5091       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5092     }
5093     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5094 
5095     /* propagate symmetry info of coarse matrix */
5096     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5097     if (pc->pmat->symmetric_set) {
5098       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5099     }
5100     if (pc->pmat->hermitian_set) {
5101       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5102     }
5103     if (pc->pmat->spd_set) {
5104       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5105     }
5106     /* set operators */
5107     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5108     if (pcbddc->dbg_flag) {
5109       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5110     }
5111   } else { /* processes non partecipating to coarse solver (if any) */
5112     coarse_mat = 0;
5113   }
5114   ierr = PetscFree(isarray);CHKERRQ(ierr);
5115 #if 0
5116   {
5117     PetscViewer viewer;
5118     char filename[256];
5119     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5120     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5121     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5122     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5123     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5124   }
5125 #endif
5126 
5127   /* Compute coarse null space (special handling by BDDC only) */
5128 #if 0
5129   if (pcbddc->NullSpace) {
5130     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5131   }
5132 #endif
5133   /* hack */
5134   if (pcbddc->coarse_ksp) {
5135     Vec crhs,csol;
5136 
5137     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5138     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5139     if (!csol) {
5140       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5141     }
5142     if (!crhs) {
5143       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5144     }
5145   }
5146 
5147   /* compute null space for coarse solver if the benign trick has been requested */
5148   if (pcbddc->benign_null) {
5149 
5150     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5151     for (i=0;i<pcbddc->benign_n;i++) {
5152       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5153     }
5154     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5155     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5156     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5157     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5158     if (coarse_mat) {
5159       Vec         nullv;
5160       PetscScalar *array,*array2;
5161       PetscInt    nl;
5162 
5163       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5164       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5165       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5166       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5167       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5168       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5169       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5170       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5171       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5172       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5173     }
5174   }
5175 
5176   if (pcbddc->coarse_ksp) {
5177     PetscBool ispreonly;
5178 
5179     if (CoarseNullSpace) {
5180       PetscBool isnull;
5181       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5182       if (isnull) {
5183         if (isbddc) {
5184           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5185         } else {
5186           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5187         }
5188       } else {
5189         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5190       }
5191     }
5192     /* setup coarse ksp */
5193     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5194     /* Check coarse problem if in debug mode or if solving with an iterative method */
5195     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5196     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5197       KSP       check_ksp;
5198       KSPType   check_ksp_type;
5199       PC        check_pc;
5200       Vec       check_vec,coarse_vec;
5201       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5202       PetscInt  its;
5203       PetscBool compute_eigs;
5204       PetscReal *eigs_r,*eigs_c;
5205       PetscInt  neigs;
5206       const char *prefix;
5207 
5208       /* Create ksp object suitable for estimation of extreme eigenvalues */
5209       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5210       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5211       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5212       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5213       if (ispreonly) {
5214         check_ksp_type = KSPPREONLY;
5215         compute_eigs = PETSC_FALSE;
5216       } else {
5217         check_ksp_type = KSPGMRES;
5218         compute_eigs = PETSC_TRUE;
5219       }
5220       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5221       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5222       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5223       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5224       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5225       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5226       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5227       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5228       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5229       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5230       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5231       /* create random vec */
5232       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5233       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5234       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5235       if (CoarseNullSpace) {
5236         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5237       }
5238       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5239       /* solve coarse problem */
5240       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5241       if (CoarseNullSpace) {
5242         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5243       }
5244       /* set eigenvalue estimation if preonly has not been requested */
5245       if (compute_eigs) {
5246         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5247         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5248         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5249         lambda_max = eigs_r[neigs-1];
5250         lambda_min = eigs_r[0];
5251         if (pcbddc->use_coarse_estimates) {
5252           if (lambda_max>lambda_min) {
5253             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5254             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5255           }
5256         }
5257       }
5258 
5259       /* check coarse problem residual error */
5260       if (pcbddc->dbg_flag) {
5261         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5262         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5263         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5264         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5265         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5266         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5267         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5268         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5269         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5270         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5271         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5272         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5273         if (CoarseNullSpace) {
5274           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5275         }
5276         if (compute_eigs) {
5277           PetscReal lambda_max_s,lambda_min_s;
5278           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5279           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5280           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5281           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5282           for (i=0;i<neigs;i++) {
5283             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5284           }
5285         }
5286         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5287         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5288       }
5289       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5290       if (compute_eigs) {
5291         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5292         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5293       }
5294     }
5295   }
5296   /* print additional info */
5297   if (pcbddc->dbg_flag) {
5298     /* waits until all processes reaches this point */
5299     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5300     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5301     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5302   }
5303 
5304   /* free memory */
5305   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5306   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5307   PetscFunctionReturn(0);
5308 }
5309 
5310 #undef __FUNCT__
5311 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5312 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5313 {
5314   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5315   PC_IS*         pcis = (PC_IS*)pc->data;
5316   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5317   IS             subset,subset_mult,subset_n;
5318   PetscInt       local_size,coarse_size=0;
5319   PetscInt       *local_primal_indices=NULL;
5320   const PetscInt *t_local_primal_indices;
5321   PetscErrorCode ierr;
5322 
5323   PetscFunctionBegin;
5324   /* Compute global number of coarse dofs */
5325   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5326     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5327   }
5328   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5329   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5330   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5331   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5332   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5333   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5334   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5335   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5336   if (local_size != pcbddc->local_primal_size) {
5337     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5338   }
5339   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5340   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5341   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5342   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5343   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5344 
5345   /* check numbering */
5346   if (pcbddc->dbg_flag) {
5347     PetscScalar coarsesum,*array,*array2;
5348     PetscInt    i;
5349     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5350 
5351     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5352     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5353     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5354     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5355     /* counter */
5356     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5357     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5358     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5359     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5360     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5361     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5362 
5363     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5364     for (i=0;i<pcbddc->local_primal_size;i++) {
5365       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5366     }
5367     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5368     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5369     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5370     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5371     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5372     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5373     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5374     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5375     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5376     for (i=0;i<pcis->n;i++) {
5377       if (array[i] != 0.0 && array[i] != array2[i]) {
5378         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5379         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5380         set_error = PETSC_TRUE;
5381         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5382       }
5383     }
5384     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5385     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5386     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5387     for (i=0;i<pcis->n;i++) {
5388       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5389     }
5390     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5391     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5392     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5393     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5394     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5395     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5396     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5397       PetscInt *gidxs;
5398 
5399       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5400       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5401       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5402       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5403       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5404       for (i=0;i<pcbddc->local_primal_size;i++) {
5405         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5406       }
5407       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5408       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5409     }
5410     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5411     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5412   }
5413   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5414   /* get back data */
5415   *coarse_size_n = coarse_size;
5416   *local_primal_indices_n = local_primal_indices;
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 #undef __FUNCT__
5421 #define __FUNCT__ "PCBDDCGlobalToLocal"
5422 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5423 {
5424   IS             localis_t;
5425   PetscInt       i,lsize,*idxs,n;
5426   PetscScalar    *vals;
5427   PetscErrorCode ierr;
5428 
5429   PetscFunctionBegin;
5430   /* get indices in local ordering exploiting local to global map */
5431   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5432   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5433   for (i=0;i<lsize;i++) vals[i] = 1.0;
5434   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5435   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5436   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5437   if (idxs) { /* multilevel guard */
5438     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5439   }
5440   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5441   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5442   ierr = PetscFree(vals);CHKERRQ(ierr);
5443   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5444   /* now compute set in local ordering */
5445   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5446   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5447   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5448   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5449   for (i=0,lsize=0;i<n;i++) {
5450     if (PetscRealPart(vals[i]) > 0.5) {
5451       lsize++;
5452     }
5453   }
5454   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5455   for (i=0,lsize=0;i<n;i++) {
5456     if (PetscRealPart(vals[i]) > 0.5) {
5457       idxs[lsize++] = i;
5458     }
5459   }
5460   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5461   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5462   *localis = localis_t;
5463   PetscFunctionReturn(0);
5464 }
5465 
5466 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5467 #undef __FUNCT__
5468 #define __FUNCT__ "PCBDDCMatMult_Private"
5469 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5470 {
5471   PCBDDCChange_ctx change_ctx;
5472   PetscErrorCode   ierr;
5473 
5474   PetscFunctionBegin;
5475   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5476   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5477   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5478   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5479   PetscFunctionReturn(0);
5480 }
5481 
5482 #undef __FUNCT__
5483 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5484 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5485 {
5486   PCBDDCChange_ctx change_ctx;
5487   PetscErrorCode   ierr;
5488 
5489   PetscFunctionBegin;
5490   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5491   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5492   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5493   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5494   PetscFunctionReturn(0);
5495 }
5496 
5497 #undef __FUNCT__
5498 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5499 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5500 {
5501   PC_IS               *pcis=(PC_IS*)pc->data;
5502   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5503   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5504   Mat                 S_j;
5505   PetscInt            *used_xadj,*used_adjncy;
5506   PetscBool           free_used_adj;
5507   PetscErrorCode      ierr;
5508 
5509   PetscFunctionBegin;
5510   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5511   free_used_adj = PETSC_FALSE;
5512   if (pcbddc->sub_schurs_layers == -1) {
5513     used_xadj = NULL;
5514     used_adjncy = NULL;
5515   } else {
5516     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5517       used_xadj = pcbddc->mat_graph->xadj;
5518       used_adjncy = pcbddc->mat_graph->adjncy;
5519     } else if (pcbddc->computed_rowadj) {
5520       used_xadj = pcbddc->mat_graph->xadj;
5521       used_adjncy = pcbddc->mat_graph->adjncy;
5522     } else {
5523       PetscBool      flg_row=PETSC_FALSE;
5524       const PetscInt *xadj,*adjncy;
5525       PetscInt       nvtxs;
5526 
5527       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5528       if (flg_row) {
5529         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5530         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5531         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5532         free_used_adj = PETSC_TRUE;
5533       } else {
5534         pcbddc->sub_schurs_layers = -1;
5535         used_xadj = NULL;
5536         used_adjncy = NULL;
5537       }
5538       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5539     }
5540   }
5541 
5542   /* setup sub_schurs data */
5543   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5544   if (!sub_schurs->use_mumps) {
5545     /* pcbddc->ksp_D up to date only if not using MUMPS */
5546     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5547     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5548   } else {
5549     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5550     PetscBool isseqaij;
5551     if (!pcbddc->use_vertices && reuse_solvers) {
5552       PetscInt n_vertices;
5553 
5554       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5555       reuse_solvers = (PetscBool)!n_vertices;
5556     }
5557     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5558     if (!isseqaij) {
5559       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5560       if (matis->A == pcbddc->local_mat) {
5561         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5562         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5563       } else {
5564         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5565       }
5566     }
5567     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5568   }
5569   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5570 
5571   /* free adjacency */
5572   if (free_used_adj) {
5573     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5574   }
5575   PetscFunctionReturn(0);
5576 }
5577 
5578 #undef __FUNCT__
5579 #define __FUNCT__ "PCBDDCInitSubSchurs"
5580 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5581 {
5582   PC_IS               *pcis=(PC_IS*)pc->data;
5583   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5584   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5585   PCBDDCGraph         graph;
5586   PetscErrorCode      ierr;
5587 
5588   PetscFunctionBegin;
5589   /* attach interface graph for determining subsets */
5590   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5591     IS       verticesIS,verticescomm;
5592     PetscInt vsize,*idxs;
5593 
5594     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5595     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5596     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5597     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5598     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5599     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5600     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5601     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5602     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5603     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5604     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5605 /*
5606     if (pcbddc->dbg_flag) {
5607       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5608     }
5609 */
5610   } else {
5611     graph = pcbddc->mat_graph;
5612   }
5613 
5614   /* sub_schurs init */
5615   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5616 
5617   /* free graph struct */
5618   if (pcbddc->sub_schurs_rebuild) {
5619     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5620   }
5621   PetscFunctionReturn(0);
5622 }
5623 
5624 #undef __FUNCT__
5625 #define __FUNCT__ "PCBDDCCheckOperator"
5626 PetscErrorCode PCBDDCCheckOperator(PC pc)
5627 {
5628   PC_IS               *pcis=(PC_IS*)pc->data;
5629   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5630   PetscErrorCode      ierr;
5631 
5632   PetscFunctionBegin;
5633   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5634     IS             zerodiag = NULL;
5635     Mat            S_j,B0_B=NULL;
5636     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5637     PetscScalar    *p0_check,*array,*array2;
5638     PetscReal      norm;
5639     PetscInt       i;
5640 
5641     /* B0 and B0_B */
5642     if (zerodiag) {
5643       IS       dummy;
5644 
5645       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5646       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5647       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5648       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5649     }
5650     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5651     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5652     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5653     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5654     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5655     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5656     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5657     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5658     /* S_j */
5659     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5660     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5661 
5662     /* mimic vector in \widetilde{W}_\Gamma */
5663     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5664     /* continuous in primal space */
5665     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5666     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5667     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5668     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5669     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5670     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5671     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5672     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5673     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5674     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5675     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5676     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5677     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5678     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5679 
5680     /* assemble rhs for coarse problem */
5681     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5682     /* local with Schur */
5683     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5684     if (zerodiag) {
5685       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5686       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5687       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5688       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5689     }
5690     /* sum on primal nodes the local contributions */
5691     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5692     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5693     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5694     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5695     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5696     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5697     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5698     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5699     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5700     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5701     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5702     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5703     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5704     /* scale primal nodes (BDDC sums contibutions) */
5705     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5706     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5707     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5708     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5709     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5710     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5711     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5712     /* global: \widetilde{B0}_B w_\Gamma */
5713     if (zerodiag) {
5714       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5715       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5716       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5717       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5718     }
5719     /* BDDC */
5720     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5721     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5722 
5723     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5724     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5725     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5726     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5727     for (i=0;i<pcbddc->benign_n;i++) {
5728       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5729     }
5730     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5731     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5732     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5733     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5734     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5735     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5736   }
5737   PetscFunctionReturn(0);
5738 }
5739