xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 4f1b2e486e300201af44616c8751d8bf1ae87a02)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatDetectDisconnectedComponents"
10 PetscErrorCode MatDetectDisconnectedComponents(Mat A, PetscBool filter, PetscInt *ncc, IS* cc[])
11 {
12   Mat                    B;
13   IS                     is_dummy,*cc_n;
14   ISLocalToGlobalMapping l2gmap_dummy;
15   PCBDDCGraph            graph;
16   PetscInt               i,n;
17   PetscInt               *xadj,*adjncy;
18   PetscInt               *xadj_filtered,*adjncy_filtered;
19   PetscBool              flg_row,isseqaij;
20   PetscErrorCode         ierr;
21 
22   PetscFunctionBegin;
23   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
24   if (!isseqaij && filter) {
25     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
26   } else {
27     B = A;
28   }
29   ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
30 
31   /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
32   if (filter) {
33     PetscScalar *data;
34     PetscInt    j,cum;
35 
36     ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr);
37     ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr);
38     cum = 0;
39     for (i=0;i<n;i++) {
40       PetscInt t;
41 
42       for (j=xadj[i];j<xadj[i+1];j++) {
43         if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
44           continue;
45         }
46         adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
47       }
48       t = xadj_filtered[i];
49       xadj_filtered[i] = cum;
50       cum += t;
51     }
52     ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr);
53   } else {
54     xadj_filtered = NULL;
55     adjncy_filtered = NULL;
56   }
57 
58   /* compute local connected components using PCBDDCGraph */
59   ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr);
60   ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
61   ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
62   ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
63   ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n);CHKERRQ(ierr);
64   ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
65   if (xadj_filtered) {
66     graph->xadj = xadj_filtered;
67     graph->adjncy = adjncy_filtered;
68   } else {
69     graph->xadj = xadj;
70     graph->adjncy = adjncy;
71   }
72   ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
73   ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
74 
75   /* partial clean up */
76   ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr);
77   ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
78   if (!isseqaij && filter) {
79     ierr = MatDestroy(&B);CHKERRQ(ierr);
80   }
81 
82   /* get back data */
83   *ncc = graph->ncc;
84   ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr);
85   for (i=0;i<graph->ncc;i++) {
86     ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr);
87   }
88   *cc = cc_n;
89 
90   /* clean up graph */
91   graph->xadj = 0;
92   graph->adjncy = 0;
93   ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 
97 #undef __FUNCT__
98 #define __FUNCT__ "PCBDDCBenignCheck"
99 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
100 {
101   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
102   PC_IS*         pcis = (PC_IS*)(pc->data);
103   IS             dirIS = NULL;
104   PetscInt       i;
105   PetscErrorCode ierr;
106 
107   PetscFunctionBegin;
108   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
109   if (zerodiag) {
110     Mat            A;
111     Vec            vec3_N;
112     PetscScalar    *vals;
113     const PetscInt *idxs;
114     PetscInt       nz;
115 
116     /* p0 */
117     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
118     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
119     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
120     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
121     for (i=0;i<nz;i++) vals[i] = 1.;
122     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
123     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
124     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
125     /* v_I */
126     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
127     for (i=0;i<nz;i++) vals[i] = 0.;
128     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
129     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
130     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
131     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
132     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
133     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
134     if (dirIS) {
135       PetscInt n;
136 
137       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
138       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
139       for (i=0;i<n;i++) vals[i] = 0.;
140       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
141       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
142     }
143     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
144     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
145     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
146     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
147     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
148     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
149     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
150     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) {
151       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
152     }
153     ierr = PetscFree(vals);CHKERRQ(ierr);
154     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
155   }
156   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
157 
158   /* check PCBDDCBenignGetOrSetP0 */
159   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
160   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
161   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
162   for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
163   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
164   for (i=0;i<pcbddc->benign_n;i++) {
165     if ((PetscInt)PetscRealPart(pcbddc->benign_p0[i]) != -PetscGlobalRank-i) {
166       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e at %d instead of %1.4e\n",pcbddc->benign_p0[i],i,-PetscGlobalRank-i);CHKERRQ(ierr);
167     }
168   }
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
174 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
175 {
176   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
177   IS             pressures,zerodiag,*zerodiag_subs;
178   PetscInt       nz,n;
179   PetscBool      sorted,have_null,has_null_pressures,recompute_zerodiag;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
184   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
185   pcbddc->benign_original_mat = pcbddc->local_mat;
186   /* if a local info on dofs is present, assumes the last field is represented by "pressures"
187      otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not)
188      Checks if all the pressure dofs in each subdomain have a zero diagonal
189      If not, a change of basis on pressures is not needed
190      since the local Schur complements are SPD
191   */
192   has_null_pressures = PETSC_TRUE;
193   have_null = PETSC_TRUE;
194   if (pcbddc->n_ISForDofsLocal) {
195     PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1;
196 
197     /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */
198     ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr);
199     ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
200     ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr);
201     ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr);
202     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
203     if (!sorted) {
204       ierr = ISSort(pressures);CHKERRQ(ierr);
205     }
206   } else {
207     pressures = NULL;
208   }
209   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
210   /* TODO: add check for shared dofs and raise error */
211   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
212   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
213   if (!sorted) {
214     ierr = ISSort(zerodiag);CHKERRQ(ierr);
215   }
216   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
217   if (!nz) {
218     if (n) have_null = PETSC_FALSE;
219     has_null_pressures = PETSC_FALSE;
220     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
221   }
222 
223   recompute_zerodiag = PETSC_FALSE;
224   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
225   zerodiag_subs = NULL;
226   pcbddc->benign_n = 0;
227   if (has_null_pressures) {
228     IS       *subs;
229     PetscInt nsubs,i;
230 
231     subs = pcbddc->local_subs;
232     nsubs = pcbddc->n_local_subs;
233     if (nsubs > 1) {
234       ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr);
235       for (i=0;i<nsubs;i++) {
236         ISLocalToGlobalMapping l2g;
237         IS                     t_zerodiag_subs;
238         PetscInt               nl;
239 
240         ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr);
241         ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr);
242         ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr);
243         if (nl) {
244           PetscBool valid = PETSC_TRUE;
245 
246           if (pressures) {
247             IS t_pressure_subs;
248             ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr);
249             ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr);
250             ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr);
251           }
252           if (valid) {
253             ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr);
254             pcbddc->benign_n++;
255           } else {
256             recompute_zerodiag = PETSC_TRUE;
257           }
258         }
259         ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr);
260         ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr);
261       }
262     } else { /* there's just one subdomain (or zero if they have not been detected */
263       PetscBool valid = PETSC_TRUE;
264       if (pressures) {
265         ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr);
266       }
267       if (valid) {
268         pcbddc->benign_n = 1;
269         ierr = PetscCalloc1(1,&zerodiag_subs);CHKERRQ(ierr);
270         ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr);
271         zerodiag_subs[0] = zerodiag;
272       }
273     }
274   }
275 
276   if (!pcbddc->benign_n) {
277     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
278     recompute_zerodiag = PETSC_FALSE;
279     has_null_pressures = PETSC_FALSE;
280     have_null = PETSC_FALSE;
281   }
282 
283   /* final check for null pressures */
284   if (zerodiag && pressures) {
285     PetscInt nz,np;
286     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
287     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
288     if (nz != np) have_null = PETSC_FALSE;
289   }
290 
291   if (recompute_zerodiag) {
292     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
293     if (pcbddc->benign_n == 1) {
294       ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr);
295       zerodiag = zerodiag_subs[0];
296     } else {
297       PetscInt i,nzn,*new_idxs;
298 
299       nzn = 0;
300       for (i=0;i<pcbddc->benign_n;i++) {
301         PetscInt ns;
302         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
303         nzn += ns;
304       }
305       ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr);
306       nzn = 0;
307       for (i=0;i<pcbddc->benign_n;i++) {
308         PetscInt ns,*idxs;
309         ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr);
310         ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
311         ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr);
312         ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr);
313         nzn += ns;
314       }
315       ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr);
316       ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr);
317     }
318     have_null = PETSC_FALSE;
319   }
320 
321   if (has_null_pressures) {
322     IS             zerodiagc;
323     const PetscInt *idxs,*idxsc;
324     PetscInt       i,s,*nnz;
325 
326     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
327     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
328     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
329     /* local change of basis for pressures */
330     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
331     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
332     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
333     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
334     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
335     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */
336     for (i=0;i<pcbddc->benign_n;i++) {
337       PetscInt nzs,j;
338 
339       ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr);
340       ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
341       for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
342       nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
343       ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr);
344     }
345     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
346     ierr = PetscFree(nnz);CHKERRQ(ierr);
347     /* set identity on velocities */
348     for (i=0;i<n-nz;i++) {
349       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
350     }
351     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
352     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
353     ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr);
354     /* set change on pressures */
355     for (s=0;s<pcbddc->benign_n;s++) {
356       PetscScalar *array;
357       PetscInt    nzs;
358 
359       ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr);
360       ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
361       for (i=0;i<nzs-1;i++) {
362         PetscScalar vals[2];
363         PetscInt    cols[2];
364 
365         cols[0] = idxs[i];
366         cols[1] = idxs[nzs-1];
367         vals[0] = 1.;
368         vals[1] = 1.;
369         ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
370       }
371       ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr);
372       for (i=0;i<nzs-1;i++) array[i] = -1.;
373       array[nzs-1] = 1.;
374       ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
375       /* store local idxs for p0 */
376       pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
377       ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr);
378       ierr = ISDestroy(&zerodiag_subs[s]);CHKERRQ(ierr);
379       ierr = PetscFree(array);CHKERRQ(ierr);
380     }
381     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
382     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
383     /* TODO: need optimization? */
384     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
385     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
386     /* store global idxs for p0 */
387     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
388   }
389   ierr = PetscFree(zerodiag_subs);CHKERRQ(ierr);
390   ierr = ISDestroy(&pressures);CHKERRQ(ierr);
391 
392   /* determines if the coarse solver will be singular or not */
393   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
394   *zerodiaglocal = zerodiag;
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNCT__
399 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
400 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
401 {
402   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
403   PetscErrorCode ierr;
404 
405   PetscFunctionBegin;
406   if (!pcbddc->benign_sf) {
407     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
408     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr);
409   }
410   if (get) { /* use SF to get values */
411     PetscScalar *array;
412 
413     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
414     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
415     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr);
416     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
417   } else { /* use VecSetValues (not scalable, I should try to find a better solution (defining a new MPI_OP for reduction) */
418     ierr = VecSetValues(v,pcbddc->benign_n,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
419     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
420     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
421   }
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
427 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
428 {
429   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
430   PetscErrorCode ierr;
431 
432   PetscFunctionBegin;
433   /* TODO: add error checking
434     - avoid nested pop (or push) calls.
435     - cannot push before pop.
436     - cannot call this if pcbddc->local_mat is NULL
437   */
438   if (!pcbddc->benign_n) {
439     PetscFunctionReturn(0);
440   }
441   if (pop) {
442     IS       is_p0;
443     MatReuse reuse;
444 
445     /* extract B_0 */
446     reuse = MAT_INITIAL_MATRIX;
447     if (pcbddc->benign_B0) {
448       reuse = MAT_REUSE_MATRIX;
449     }
450     ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr);
451     ierr = MatGetSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr);
452     /* remove rows and cols from local problem */
453     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
454     ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr);
455     ierr = ISDestroy(&is_p0);CHKERRQ(ierr);
456   } else { /* push */
457     PetscInt i;
458 
459     for (i=0;i<pcbddc->benign_n;i++) {
460       PetscScalar *B0_vals;
461       PetscInt    *B0_cols,B0_ncol;
462 
463       ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
464       ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
465       ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+1,B0_vals,INSERT_VALUES);CHKERRQ(ierr);
466       ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr);
467       ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr);
468     }
469     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
470     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "PCBDDCAdaptiveSelection"
477 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
478 {
479   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
480   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
481   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
482   PetscBLASInt    *B_iwork,*B_ifail;
483   PetscScalar     *work,lwork;
484   PetscScalar     *St,*S,*eigv;
485   PetscScalar     *Sarray,*Starray;
486   PetscReal       *eigs,thresh;
487   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
488   PetscBool       allocated_S_St;
489 #if defined(PETSC_USE_COMPLEX)
490   PetscReal       *rwork;
491 #endif
492   PetscErrorCode  ierr;
493 
494   PetscFunctionBegin;
495   if (!sub_schurs->use_mumps) {
496     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
497   }
498 
499   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
500     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
501   }
502 
503   if (pcbddc->dbg_flag) {
504     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
505     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
506     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
507     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
508   }
509 
510   if (pcbddc->dbg_flag) {
511     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
512   }
513 
514   /* max size of subsets */
515   mss = 0;
516   for (i=0;i<sub_schurs->n_subs;i++) {
517     PetscInt subset_size;
518 
519     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
520     mss = PetscMax(mss,subset_size);
521   }
522 
523   /* min/max and threshold */
524   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
525   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
526   nmax = PetscMax(nmin,nmax);
527   allocated_S_St = PETSC_FALSE;
528   if (nmin) {
529     allocated_S_St = PETSC_TRUE;
530   }
531 
532   /* allocate lapack workspace */
533   cum = cum2 = 0;
534   maxneigs = 0;
535   for (i=0;i<sub_schurs->n_subs;i++) {
536     PetscInt n,subset_size;
537 
538     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
539     n = PetscMin(subset_size,nmax);
540     cum += subset_size;
541     cum2 += subset_size*n;
542     maxneigs = PetscMax(maxneigs,n);
543   }
544   if (mss) {
545     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
546       PetscBLASInt B_itype = 1;
547       PetscBLASInt B_N = mss;
548       PetscReal    zero = 0.0;
549       PetscReal    eps = 0.0; /* dlamch? */
550 
551       B_lwork = -1;
552       S = NULL;
553       St = NULL;
554       eigs = NULL;
555       eigv = NULL;
556       B_iwork = NULL;
557       B_ifail = NULL;
558 #if defined(PETSC_USE_COMPLEX)
559       rwork = NULL;
560 #endif
561       thresh = 1.0;
562       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
563 #if defined(PETSC_USE_COMPLEX)
564       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
565 #else
566       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
567 #endif
568       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
569       ierr = PetscFPTrapPop();CHKERRQ(ierr);
570     } else {
571         /* TODO */
572     }
573   } else {
574     lwork = 0;
575   }
576 
577   nv = 0;
578   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
579     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
580   }
581   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
582   if (allocated_S_St) {
583     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
584   }
585   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
586 #if defined(PETSC_USE_COMPLEX)
587   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
588 #endif
589   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
590                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
591                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
592                       nv+cum,&pcbddc->adaptive_constraints_idxs,
593                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
594   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
595 
596   maxneigs = 0;
597   cum = cum2 = cumarray = 0;
598   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
599   pcbddc->adaptive_constraints_data_ptr[0] = 0;
600   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
601     const PetscInt *idxs;
602 
603     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
604     for (cum=0;cum<nv;cum++) {
605       pcbddc->adaptive_constraints_n[cum] = 1;
606       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
607       pcbddc->adaptive_constraints_data[cum] = 1.0;
608       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
609       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
610     }
611     cum2 = cum;
612     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
613   }
614 
615   if (mss) { /* multilevel */
616     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
617     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
618   }
619 
620   for (i=0;i<sub_schurs->n_subs;i++) {
621 
622     const PetscInt *idxs;
623     PetscReal      infty = PETSC_MAX_REAL;
624     PetscInt       j,subset_size,eigs_start = 0;
625     PetscBLASInt   B_N;
626     PetscBool      same_data = PETSC_FALSE;
627 
628     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
629     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
630     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
631       if (sub_schurs->is_hermitian) {
632         PetscInt j,k;
633         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
634           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
635           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
636         }
637         for (j=0;j<subset_size;j++) {
638           for (k=j;k<subset_size;k++) {
639             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
640             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
641           }
642         }
643       } else {
644         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
645         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
646       }
647     } else {
648       S = Sarray + cumarray;
649       St = Starray + cumarray;
650     }
651 
652     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
653     /* see if we can save some work */
654     if (sub_schurs->n_subs == 1) {
655       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
656     }
657 
658     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
659       B_neigs = 0;
660     } else {
661       /* Threshold: this is an heuristic for edges */
662       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
663 
664       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
665         PetscBLASInt B_itype = 1;
666         PetscBLASInt B_IL, B_IU;
667         PetscReal    eps = -1.0; /* dlamch? */
668         PetscInt     nmin_s;
669 
670         if (pcbddc->dbg_flag) {
671           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
672         }
673 
674         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
675         if (thresh > 1.+PETSC_SMALL) {
676 
677           /* ask for eigenvalues larger than thresh */
678 #if defined(PETSC_USE_COMPLEX)
679           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
680 #else
681           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
682 #endif
683         } else {
684           B_IU = PetscMax(1,PetscMin(B_N,nmax));
685           B_IL = 1;
686 #if defined(PETSC_USE_COMPLEX)
687           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
688 #else
689           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
690 #endif
691         }
692         ierr = PetscFPTrapPop();CHKERRQ(ierr);
693         if (B_ierr) {
694           if (B_ierr < 0 ) {
695             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
696           } else if (B_ierr <= B_N) {
697             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
698           } else {
699             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
700           }
701         }
702 
703         if (B_neigs > nmax) {
704           if (pcbddc->dbg_flag) {
705             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
706           }
707           eigs_start = B_neigs -nmax;
708           B_neigs = nmax;
709         }
710 
711         nmin_s = PetscMin(nmin,B_N);
712         if (B_neigs < nmin_s) {
713           PetscBLASInt B_neigs2;
714 
715           B_IU = B_N - B_neigs;
716           B_IL = B_N - nmin_s + 1;
717           if (pcbddc->dbg_flag) {
718             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
719           }
720           if (sub_schurs->is_hermitian) {
721             PetscInt j;
722             for (j=0;j<subset_size;j++) {
723               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
724             }
725             for (j=0;j<subset_size;j++) {
726               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
727             }
728           } else {
729             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
730             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
731           }
732           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
733 #if defined(PETSC_USE_COMPLEX)
734           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
735 #else
736           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
737 #endif
738           ierr = PetscFPTrapPop();CHKERRQ(ierr);
739           B_neigs += B_neigs2;
740         }
741         if (B_ierr) {
742           if (B_ierr < 0 ) {
743             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
744           } else if (B_ierr <= B_N) {
745             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
746           } else {
747             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
748           }
749         }
750         if (pcbddc->dbg_flag) {
751           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
752           for (j=0;j<B_neigs;j++) {
753             if (eigs[j] == 0.0) {
754               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
755             } else {
756               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
757             }
758           }
759         }
760       } else {
761           /* TODO */
762       }
763     }
764     maxneigs = PetscMax(B_neigs,maxneigs);
765     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
766     if (B_neigs) {
767       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
768 
769       if (pcbddc->dbg_flag > 1) {
770         PetscInt ii;
771         for (ii=0;ii<B_neigs;ii++) {
772           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
773           for (j=0;j<B_N;j++) {
774 #if defined(PETSC_USE_COMPLEX)
775             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
776             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
777             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
778 #else
779             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
780 #endif
781           }
782         }
783       }
784 #if 0
785       for (j=0;j<B_neigs;j++) {
786         PetscBLASInt Blas_N,Blas_one = 1.0;
787         PetscScalar norm;
788         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
789         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
790                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
791         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
792           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
793         } else {
794           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
795         }
796         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
797       }
798 #endif
799       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
800       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
801       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
802       cum++;
803     }
804     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
805     /* shift for next computation */
806     cumarray += subset_size*subset_size;
807   }
808   if (pcbddc->dbg_flag) {
809     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
810   }
811 
812   if (mss) {
813     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
814     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
815     /* destroy matrices (junk) */
816     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
817     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
818   }
819   if (allocated_S_St) {
820     ierr = PetscFree2(S,St);CHKERRQ(ierr);
821   }
822   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
823 #if defined(PETSC_USE_COMPLEX)
824   ierr = PetscFree(rwork);CHKERRQ(ierr);
825 #endif
826   if (pcbddc->dbg_flag) {
827     PetscInt maxneigs_r;
828     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
829     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "PCBDDCSetUpSolvers"
836 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
837 {
838   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
839   PetscScalar    *coarse_submat_vals;
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   /* Setup local scatters R_to_B and (optionally) R_to_D */
844   /* PCBDDCSetUpLocalWorkVectors should be called first! */
845   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
846 
847   /* Setup local neumann solver ksp_R */
848   /* PCBDDCSetUpLocalScatters should be called first! */
849   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
850 
851   /* Change global null space passed in by the user if change of basis has been requested */
852   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
853     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
854   }
855 
856   /*
857      Setup local correction and local part of coarse basis.
858      Gives back the dense local part of the coarse matrix in column major ordering
859   */
860   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
861 
862   /* Compute total number of coarse nodes and setup coarse solver */
863   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
864 
865   /* free */
866   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "PCBDDCResetCustomization"
872 PetscErrorCode PCBDDCResetCustomization(PC pc)
873 {
874   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
879   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
880   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
881   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
882   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
883   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
884   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
885   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
886   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
887   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
888   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "PCBDDCResetTopography"
894 PetscErrorCode PCBDDCResetTopography(PC pc)
895 {
896   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
897   PetscInt       i;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
902   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
903   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
904   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
905   for (i=0;i<pcbddc->n_local_subs;i++) {
906     ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr);
907   }
908   ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr);
909   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "PCBDDCResetSolvers"
915 PetscErrorCode PCBDDCResetSolvers(PC pc)
916 {
917   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
918   PetscScalar    *array;
919   PetscErrorCode ierr;
920 
921   PetscFunctionBegin;
922   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
923   if (pcbddc->coarse_phi_B) {
924     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
925     ierr = PetscFree(array);CHKERRQ(ierr);
926   }
927   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
928   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
929   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
930   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
931   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
932   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
933   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
934   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
935   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
936   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
937   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
938   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
939   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
940   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
941   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
942   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
943   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
944   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
945   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
946   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
947   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
948   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
949   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
950   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
951   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
952   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
953   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
954   ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr);
955   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
956   ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
962 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
963 {
964   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
965   PC_IS          *pcis = (PC_IS*)pc->data;
966   VecType        impVecType;
967   PetscInt       n_constraints,n_R,old_size;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   if (!pcbddc->ConstraintMatrix) {
972     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
973   }
974   /* get sizes */
975   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
976   n_R = pcis->n - pcbddc->n_vertices;
977   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
978   /* local work vectors (try to avoid unneeded work)*/
979   /* R nodes */
980   old_size = -1;
981   if (pcbddc->vec1_R) {
982     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
983   }
984   if (n_R != old_size) {
985     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
986     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
987     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
988     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
989     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
990     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
991   }
992   /* local primal dofs */
993   old_size = -1;
994   if (pcbddc->vec1_P) {
995     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
996   }
997   if (pcbddc->local_primal_size != old_size) {
998     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
999     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
1000     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
1001     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
1002   }
1003   /* local explicit constraints */
1004   old_size = -1;
1005   if (pcbddc->vec1_C) {
1006     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
1007   }
1008   if (n_constraints && n_constraints != old_size) {
1009     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
1010     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
1011     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
1012     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "PCBDDCSetUpCorrection"
1019 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
1020 {
1021   PetscErrorCode  ierr;
1022   /* pointers to pcis and pcbddc */
1023   PC_IS*          pcis = (PC_IS*)pc->data;
1024   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1025   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1026   /* submatrices of local problem */
1027   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
1028   /* submatrices of benign trick */
1029   Mat             B0_V = NULL;
1030   /* submatrices of local coarse problem */
1031   Mat             S_VV,S_CV,S_VC,S_CC;
1032   /* working matrices */
1033   Mat             C_CR;
1034   /* additional working stuff */
1035   PC              pc_R;
1036   Mat             F;
1037   PetscBool       isLU,isCHOL,isILU;
1038 
1039   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
1040   PetscScalar     *work;
1041   PetscInt        *idx_V_B;
1042   PetscInt        n,n_vertices,n_constraints,*p0_lidx_I;
1043   PetscInt        i,n_R,n_D,n_B;
1044   PetscBool       unsymmetric_check;
1045   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
1046   MatType         impMatType;
1047   /* some shortcuts to scalars */
1048   PetscScalar     one=1.0,m_one=-1.0;
1049 
1050   PetscFunctionBegin;
1051   n_vertices = pcbddc->n_vertices;
1052   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
1053   /* Set Non-overlapping dimensions */
1054   n_B = pcis->n_B;
1055   n_D = pcis->n - n_B;
1056   n_R = pcis->n - n_vertices;
1057 
1058   /* Set types for local objects needed by BDDC precondtioner */
1059   impMatType = MATSEQDENSE;
1060 
1061   /* vertices in boundary numbering */
1062   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
1063   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
1064   if (i != n_vertices) {
1065     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
1066   }
1067 
1068   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
1069   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
1070   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
1071   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
1072   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
1073   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
1074   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
1075   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
1076   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
1077   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
1078 
1079   unsymmetric_check = PETSC_FALSE;
1080   /* allocate workspace */
1081   n = 0;
1082   if (n_constraints) {
1083     n += n_R*n_constraints;
1084   }
1085   if (n_vertices) {
1086     n = PetscMax(2*n_R*n_vertices,n);
1087     n = PetscMax((n_R+n_B)*n_vertices,n);
1088   }
1089   if (!pcbddc->symmetric_primal) {
1090     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
1091     unsymmetric_check = PETSC_TRUE;
1092   }
1093   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1094 
1095   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
1096   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
1097   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
1098   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
1099   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
1100   if (isLU || isILU || isCHOL) {
1101     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
1102   } else if (sub_schurs->reuse_mumps) {
1103     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1104     MatFactorType type;
1105 
1106     F = reuse_mumps->F;
1107     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
1108     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
1109   } else {
1110     F = NULL;
1111   }
1112 
1113   /* Precompute stuffs needed for preprocessing and application of BDDC*/
1114   if (n_constraints) {
1115     Mat         M1,M2,M3;
1116     Mat         auxmat;
1117     IS          is_aux;
1118     PetscScalar *array,*array2;
1119 
1120     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
1121     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
1122 
1123     /* Extract constraints on R nodes: C_{CR}  */
1124     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
1125     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
1126     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
1127 
1128     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
1129     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
1130     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1131     for (i=0;i<n_constraints;i++) {
1132       const PetscScalar *row_cmat_values;
1133       const PetscInt    *row_cmat_indices;
1134       PetscInt          size_of_constraint,j;
1135 
1136       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1137       for (j=0;j<size_of_constraint;j++) {
1138         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
1139       }
1140       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
1141     }
1142     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
1143     if (F) {
1144       Mat B;
1145 
1146       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1147       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
1148       ierr = MatDestroy(&B);CHKERRQ(ierr);
1149     } else {
1150       PetscScalar *marr;
1151 
1152       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1153       for (i=0;i<n_constraints;i++) {
1154         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1155         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
1156         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1157         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1158         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1159       }
1160       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
1161     }
1162     if (!pcbddc->switch_static) {
1163       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
1164       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1165       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1166       for (i=0;i<n_constraints;i++) {
1167         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
1168         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
1169         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1172         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1173       }
1174       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
1175       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
1176       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1177     } else {
1178       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
1179       pcbddc->local_auxmat2 = local_auxmat2_R;
1180       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
1181     }
1182     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1183     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
1184     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
1185     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
1186     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
1187     if (isCHOL) {
1188       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
1189     } else {
1190       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
1191     }
1192     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
1193     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
1194     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
1195     ierr = MatDestroy(&M2);CHKERRQ(ierr);
1196     ierr = MatDestroy(&M3);CHKERRQ(ierr);
1197     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1198     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1199     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1200     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1201     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1202   }
1203   /* Get submatrices from subdomain matrix */
1204   if (pcbddc->benign_n) {
1205     IS        dummy;
1206     Mat       B0_R;
1207     PetscReal norm;
1208 
1209     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1210     ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1211     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1212     if (norm > PETSC_SMALL) {
1213       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1214     }
1215     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1216     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1217   }
1218 
1219   if (n_vertices) {
1220     IS is_aux;
1221 
1222     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1223       IS tis;
1224 
1225       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1226       ierr = ISSort(tis);CHKERRQ(ierr);
1227       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1228       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1229     } else {
1230       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1231     }
1232     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1233     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1234     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1235     if (pcbddc->benign_n) {
1236       IS dummy;
1237 
1238       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
1239       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1240       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1241     }
1242     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1243   }
1244 
1245   /* Matrix of coarse basis functions (local) */
1246   if (pcbddc->coarse_phi_B) {
1247     PetscInt on_B,on_primal,on_D=n_D;
1248     if (pcbddc->coarse_phi_D) {
1249       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1250     }
1251     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1252     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1253       PetscScalar *marray;
1254 
1255       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1256       ierr = PetscFree(marray);CHKERRQ(ierr);
1257       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1258       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1259       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1260       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1261     }
1262   }
1263 
1264   if (!pcbddc->coarse_phi_B) {
1265     PetscScalar *marray;
1266 
1267     n = n_B*pcbddc->local_primal_size;
1268     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1269       n += n_D*pcbddc->local_primal_size;
1270     }
1271     if (!pcbddc->symmetric_primal) {
1272       n *= 2;
1273     }
1274     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1275     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1276     n = n_B*pcbddc->local_primal_size;
1277     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1278       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1279       n += n_D*pcbddc->local_primal_size;
1280     }
1281     if (!pcbddc->symmetric_primal) {
1282       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1283       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1284         n = n_B*pcbddc->local_primal_size;
1285         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1286       }
1287     } else {
1288       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1289       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1290       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1291         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1292         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1293       }
1294     }
1295   }
1296 
1297   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1298   p0_lidx_I = NULL;
1299   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1300     const PetscInt *idxs;
1301 
1302     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1303     ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr);
1304     for (i=0;i<pcbddc->benign_n;i++) {
1305       ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr);
1306     }
1307     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1308   }
1309 
1310   /* vertices */
1311   if (n_vertices) {
1312 
1313     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1314 
1315     if (n_R) {
1316       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1317       PetscBLASInt B_N,B_one = 1;
1318       PetscScalar  *x,*y;
1319       PetscBool    isseqaij;
1320 
1321       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1322       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1323       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1324       if (F) { /* TODO could be optimized for symmetric problems */
1325         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1326       } else {
1327         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1328         for (i=0;i<n_vertices;i++) {
1329           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1330           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1331           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1332           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1333           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1334         }
1335         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1336       }
1337       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1338       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1339       if (n_constraints) {
1340         Mat B;
1341 
1342         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1343         for (i=0;i<n_vertices;i++) {
1344           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1345           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1346           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1347           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1349           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1350         }
1351         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1352         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1353         ierr = MatDestroy(&B);CHKERRQ(ierr);
1354         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1355         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1356         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1357         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1358         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1359         ierr = MatDestroy(&B);CHKERRQ(ierr);
1360       }
1361       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1362       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1363         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1364       }
1365       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1366       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1367       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1368       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1369       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1370       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1371       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1372       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1373       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1374       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1375     } else {
1376       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1377     }
1378     for (i=0;i<pcbddc->benign_n;i++) {
1379       const PetscScalar *vals;
1380       const PetscInt    *idxs;
1381       PetscInt          n,j,primal_idx;
1382 
1383       ierr = MatGetRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1384       primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + i;
1385       for (j=0;j<n;j++) {
1386         coarse_submat_vals[primal_idx*pcbddc->local_primal_size+idxs[j]] = vals[j];
1387         coarse_submat_vals[idxs[j]*pcbddc->local_primal_size+primal_idx] = vals[j];
1388       }
1389       ierr = MatRestoreRow(B0_V,i,&n,&idxs,&vals);CHKERRQ(ierr);
1390     }
1391     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1392 
1393     /* coarse basis functions */
1394     for (i=0;i<n_vertices;i++) {
1395       PetscScalar *y;
1396 
1397       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1398       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1399       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1400       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1401       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1402       y[n_B*i+idx_V_B[i]] = 1.0;
1403       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1404       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1405 
1406       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1407         PetscInt j;
1408 
1409         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1410         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1411         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1412         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1413         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1414         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1415         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1416       }
1417       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1418     }
1419     /* if n_R == 0 the object is not destroyed */
1420     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1421   }
1422 
1423   if (n_constraints) {
1424     Mat B;
1425 
1426     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1427     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1428     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1429     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1430     if (n_vertices) {
1431       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1432         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1433       } else {
1434         Mat S_VCt;
1435 
1436         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1437         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1438         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1439       }
1440     }
1441     ierr = MatDestroy(&B);CHKERRQ(ierr);
1442     /* coarse basis functions */
1443     for (i=0;i<n_constraints;i++) {
1444       PetscScalar *y;
1445 
1446       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1447       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1448       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1449       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1450       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1451       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1452       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1453       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1454         PetscInt j;
1455 
1456         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1457         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1458         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1461         for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
1462         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1463       }
1464       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1465     }
1466   }
1467   if (n_constraints) {
1468     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1469   }
1470   ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr);
1471   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1472 
1473   /* compute other basis functions for non-symmetric problems */
1474   if (!pcbddc->symmetric_primal) {
1475 
1476     if (n_constraints) {
1477       Mat S_CCT,B_C;
1478 
1479       /* this is a lazy thing */
1480       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1481       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1482       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1483       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1484       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1485       if (n_vertices) {
1486         Mat B_V,S_VCT;
1487 
1488         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1489         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1490         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1491         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1492         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1493       }
1494       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1495     } else { /* if there are no constraints, reset work */
1496       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1497     }
1498     if (n_vertices && n_R) {
1499       Mat          A_VRT;
1500       PetscScalar  *marray;
1501       PetscBLASInt B_N,B_one = 1;
1502 
1503       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1504       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1505       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1506       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1507       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1508       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1509       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1510     }
1511 
1512     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1513       for (i=0;i<pcbddc->local_primal_size;i++) {
1514         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1515         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1516         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1517         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1518         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1519       }
1520     } else {
1521       for (i=0;i<pcbddc->local_primal_size;i++) {
1522         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1523         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1524         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1525         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1526         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1527       }
1528     }
1529     /* coarse basis functions */
1530     for (i=0;i<pcbddc->local_primal_size;i++) {
1531       PetscScalar *y;
1532 
1533       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1534       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1535       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1536       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       if (i<n_vertices) {
1539         y[n_B*i+idx_V_B[i]] = 1.0;
1540       }
1541       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1542       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1543 
1544       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1545         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1546         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1547         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1550         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1551       }
1552       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1553     }
1554   }
1555   /* free memory */
1556   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1557   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1558   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1559   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1560   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1561   ierr = PetscFree(work);CHKERRQ(ierr);
1562   if (n_vertices) {
1563     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1564   }
1565   if (n_constraints) {
1566     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1567   }
1568   /* Checking coarse_sub_mat and coarse basis functios */
1569   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1570   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1571   if (pcbddc->dbg_flag) {
1572     Mat         coarse_sub_mat;
1573     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1574     Mat         coarse_phi_D,coarse_phi_B;
1575     Mat         coarse_psi_D,coarse_psi_B;
1576     Mat         A_II,A_BB,A_IB,A_BI;
1577     Mat         C_B,CPHI;
1578     IS          is_dummy;
1579     Vec         mones;
1580     MatType     checkmattype=MATSEQAIJ;
1581     PetscReal   real_value;
1582 
1583     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1584     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1585     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1586     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1587     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1588     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1589     if (unsymmetric_check) {
1590       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1591       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1592     }
1593     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1594 
1595     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1596     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1597     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1598     if (unsymmetric_check) {
1599       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1600       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1601       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1602       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1603       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1604       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1605       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1606       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1607       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1608       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1609       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1610       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1611     } else {
1612       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1613       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1614       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1615       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1616       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1617       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1618       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1619       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1620     }
1621     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1622     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1623     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1624     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1625     if (pcbddc->benign_n) {
1626       Mat         B0_I,B0_B,B0_BPHI,B0_IPHI;
1627       PetscScalar *data,*data2;
1628       PetscInt    j;
1629 
1630       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr);
1631       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1632       ierr = MatGetSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1633       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1634       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1635       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1636       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1637       for (j=0;j<pcbddc->benign_n;j++) {
1638         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
1639         for (i=0;i<pcbddc->local_primal_size;i++) {
1640           data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
1641           data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
1642         }
1643       }
1644       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1645       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1646       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1647       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1648       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1649       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1650       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1651       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1652       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1653     }
1654 #if 0
1655   {
1656     PetscViewer viewer;
1657     char filename[256];
1658     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1659     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1660     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1661     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1662     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1663   }
1664 #endif
1665     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1666     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1667     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1668     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1669 
1670     /* check constraints */
1671     if (!pcbddc->benign_n) { /* TODO: add benign case */
1672       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1673       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1674       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1675       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1676       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1677       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1678       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1679       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1680       if (unsymmetric_check) {
1681         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1682         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1683         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1684         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1685         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1686       }
1687       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1688       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1689       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1690       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1691     }
1692     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1693     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1694     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1695     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1696     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1697     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1698     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1699     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1700     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1701     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1702     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1703     if (unsymmetric_check) {
1704       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1705       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1706     }
1707     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1708   }
1709   /* get back data */
1710   *coarse_submat_vals_n = coarse_submat_vals;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1716 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1717 {
1718   Mat            *work_mat;
1719   IS             isrow_s,iscol_s;
1720   PetscBool      rsorted,csorted;
1721   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1726   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1727   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1728   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1729 
1730   if (!rsorted) {
1731     const PetscInt *idxs;
1732     PetscInt *idxs_sorted,i;
1733 
1734     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1735     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1736     for (i=0;i<rsize;i++) {
1737       idxs_perm_r[i] = i;
1738     }
1739     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1740     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1741     for (i=0;i<rsize;i++) {
1742       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1743     }
1744     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1745     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1748     isrow_s = isrow;
1749   }
1750 
1751   if (!csorted) {
1752     if (isrow == iscol) {
1753       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1754       iscol_s = isrow_s;
1755     } else {
1756       const PetscInt *idxs;
1757       PetscInt *idxs_sorted,i;
1758 
1759       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1760       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1761       for (i=0;i<csize;i++) {
1762         idxs_perm_c[i] = i;
1763       }
1764       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1765       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1766       for (i=0;i<csize;i++) {
1767         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1768       }
1769       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1770       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1771     }
1772   } else {
1773     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1774     iscol_s = iscol;
1775   }
1776 
1777   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1778 
1779   if (!rsorted || !csorted) {
1780     Mat      new_mat;
1781     IS       is_perm_r,is_perm_c;
1782 
1783     if (!rsorted) {
1784       PetscInt *idxs_r,i;
1785       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1786       for (i=0;i<rsize;i++) {
1787         idxs_r[idxs_perm_r[i]] = i;
1788       }
1789       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1790       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1791     } else {
1792       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1793     }
1794     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1795 
1796     if (!csorted) {
1797       if (isrow_s == iscol_s) {
1798         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1799         is_perm_c = is_perm_r;
1800       } else {
1801         PetscInt *idxs_c,i;
1802         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1803         for (i=0;i<csize;i++) {
1804           idxs_c[idxs_perm_c[i]] = i;
1805         }
1806         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1807         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1808       }
1809     } else {
1810       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1811     }
1812     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1813 
1814     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1815     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1816     work_mat[0] = new_mat;
1817     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1818     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1819   }
1820 
1821   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1822   *B = work_mat[0];
1823   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1824   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1825   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1831 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1832 {
1833   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1834   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1835   Mat            new_mat;
1836   IS             is_local,is_global;
1837   PetscInt       local_size;
1838   PetscBool      isseqaij;
1839   PetscErrorCode ierr;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1843   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1844   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1845   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1846   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1847   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1848   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1849 
1850   /* check */
1851   if (pcbddc->dbg_flag) {
1852     Vec       x,x_change;
1853     PetscReal error;
1854 
1855     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1856     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1857     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1858     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1860     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1861     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1862     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1863     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1864     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1865     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1866     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1867     ierr = VecDestroy(&x);CHKERRQ(ierr);
1868     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1869   }
1870 
1871   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1872   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1873   if (isseqaij) {
1874     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1875   } else {
1876     Mat work_mat;
1877     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1878     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1879     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1880   }
1881   if (matis->A->symmetric_set) {
1882     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1883 #if !defined(PETSC_USE_COMPLEX)
1884     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1885 #endif
1886   }
1887   /*
1888   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1889   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1890   */
1891   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1897 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1898 {
1899   PC_IS*          pcis = (PC_IS*)(pc->data);
1900   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1901   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1902   PetscInt        *idx_R_local=NULL;
1903   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1904   PetscInt        vbs,bs;
1905   PetscBT         bitmask=NULL;
1906   PetscErrorCode  ierr;
1907 
1908   PetscFunctionBegin;
1909   /*
1910     No need to setup local scatters if
1911       - primal space is unchanged
1912         AND
1913       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1914         AND
1915       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1916   */
1917   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1918     PetscFunctionReturn(0);
1919   }
1920   /* destroy old objects */
1921   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1922   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1923   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1924   /* Set Non-overlapping dimensions */
1925   n_B = pcis->n_B;
1926   n_D = pcis->n - n_B;
1927   n_vertices = pcbddc->n_vertices;
1928 
1929   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1930 
1931   /* create auxiliary bitmask and allocate workspace */
1932   if (!sub_schurs->reuse_mumps) {
1933     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1934     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1935     for (i=0;i<n_vertices;i++) {
1936       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1937     }
1938 
1939     for (i=0, n_R=0; i<pcis->n; i++) {
1940       if (!PetscBTLookup(bitmask,i)) {
1941         idx_R_local[n_R++] = i;
1942       }
1943     }
1944   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1945     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1946 
1947     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1948     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1949   }
1950 
1951   /* Block code */
1952   vbs = 1;
1953   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1954   if (bs>1 && !(n_vertices%bs)) {
1955     PetscBool is_blocked = PETSC_TRUE;
1956     PetscInt  *vary;
1957     if (!sub_schurs->reuse_mumps) {
1958       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1959       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1960       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1961       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1962       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1963       for (i=0; i<pcis->n/bs; i++) {
1964         if (vary[i]!=0 && vary[i]!=bs) {
1965           is_blocked = PETSC_FALSE;
1966           break;
1967         }
1968       }
1969       ierr = PetscFree(vary);CHKERRQ(ierr);
1970     } else {
1971       /* Verify directly the R set */
1972       for (i=0; i<n_R/bs; i++) {
1973         PetscInt j,node=idx_R_local[bs*i];
1974         for (j=1; j<bs; j++) {
1975           if (node != idx_R_local[bs*i+j]-j) {
1976             is_blocked = PETSC_FALSE;
1977             break;
1978           }
1979         }
1980       }
1981     }
1982     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1983       vbs = bs;
1984       for (i=0;i<n_R/vbs;i++) {
1985         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1986       }
1987     }
1988   }
1989   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1990   if (sub_schurs->reuse_mumps) {
1991     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1992 
1993     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1994     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1995     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1996     reuse_mumps->is_R = pcbddc->is_R_local;
1997   } else {
1998     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1999   }
2000 
2001   /* print some info if requested */
2002   if (pcbddc->dbg_flag) {
2003     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2004     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2005     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2006     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
2007     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
2008     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr);
2009     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2010   }
2011 
2012   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
2013   if (!sub_schurs->reuse_mumps) {
2014     IS       is_aux1,is_aux2;
2015     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
2016 
2017     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2018     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
2019     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
2020     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2021     for (i=0; i<n_D; i++) {
2022       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
2023     }
2024     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2025     for (i=0, j=0; i<n_R; i++) {
2026       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
2027         aux_array1[j++] = i;
2028       }
2029     }
2030     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2031     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2032     for (i=0, j=0; i<n_B; i++) {
2033       if (!PetscBTLookup(bitmask,is_indices[i])) {
2034         aux_array2[j++] = i;
2035       }
2036     }
2037     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2038     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
2039     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
2040     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2041     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
2042 
2043     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2044       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
2045       for (i=0, j=0; i<n_R; i++) {
2046         if (PetscBTLookup(bitmask,idx_R_local[i])) {
2047           aux_array1[j++] = i;
2048         }
2049       }
2050       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
2051       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2052       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
2053     }
2054     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
2055     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
2056   } else {
2057     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2058     IS               tis;
2059     PetscInt         schur_size;
2060 
2061     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
2062     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
2063     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
2064     ierr = ISDestroy(&tis);CHKERRQ(ierr);
2065     if (pcbddc->switch_static || pcbddc->dbg_flag) {
2066       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
2067       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
2068       ierr = ISDestroy(&tis);CHKERRQ(ierr);
2069     }
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
2077 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
2078 {
2079   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
2080   PC_IS          *pcis = (PC_IS*)pc->data;
2081   PC             pc_temp;
2082   Mat            A_RR;
2083   MatReuse       reuse;
2084   PetscScalar    m_one = -1.0;
2085   PetscReal      value;
2086   PetscInt       n_D,n_R;
2087   PetscBool      use_exact,use_exact_reduced,issbaij;
2088   PetscErrorCode ierr;
2089   /* prefixes stuff */
2090   char           dir_prefix[256],neu_prefix[256],str_level[16];
2091   size_t         len;
2092 
2093   PetscFunctionBegin;
2094 
2095   /* compute prefixes */
2096   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
2097   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
2098   if (!pcbddc->current_level) {
2099     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2100     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2101     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2102     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2103   } else {
2104     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2105     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
2106     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2107     len -= 15; /* remove "pc_bddc_coarse_" */
2108     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
2109     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
2110     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2111     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
2112     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
2113     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
2114     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
2115     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
2116   }
2117 
2118   /* DIRICHLET PROBLEM */
2119   if (dirichlet) {
2120     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2121     if (pcbddc->local_mat->symmetric_set) {
2122       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2123     }
2124     /* Matrix for Dirichlet problem is pcis->A_II */
2125     n_D = pcis->n - pcis->n_B;
2126     if (!pcbddc->ksp_D) { /* create object if not yet build */
2127       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
2128       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
2129       /* default */
2130       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
2131       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
2132       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2133       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2134       if (issbaij) {
2135         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2136       } else {
2137         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2138       }
2139       /* Allow user's customization */
2140       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
2141       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2142     }
2143     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
2144     if (sub_schurs->reuse_mumps) {
2145       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2146 
2147       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
2148     }
2149     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2150     if (!n_D) {
2151       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
2152       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2153     }
2154     /* Set Up KSP for Dirichlet problem of BDDC */
2155     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
2156     /* set ksp_D into pcis data */
2157     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
2158     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
2159     pcis->ksp_D = pcbddc->ksp_D;
2160   }
2161 
2162   /* NEUMANN PROBLEM */
2163   A_RR = 0;
2164   if (neumann) {
2165     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2166     PetscInt        ibs,mbs;
2167     PetscBool       issbaij;
2168     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
2169     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
2170     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
2171     if (pcbddc->ksp_R) { /* already created ksp */
2172       PetscInt nn_R;
2173       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
2174       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2175       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
2176       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
2177         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
2178         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2179         reuse = MAT_INITIAL_MATRIX;
2180       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
2181         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
2182           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2183           reuse = MAT_INITIAL_MATRIX;
2184         } else { /* safe to reuse the matrix */
2185           reuse = MAT_REUSE_MATRIX;
2186         }
2187       }
2188       /* last check */
2189       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
2190         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2191         reuse = MAT_INITIAL_MATRIX;
2192       }
2193     } else { /* first time, so we need to create the matrix */
2194       reuse = MAT_INITIAL_MATRIX;
2195     }
2196     /* extract A_RR */
2197     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
2198     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
2199     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2200     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
2201       if (matis->A == pcbddc->local_mat) {
2202         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2203         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2204       } else {
2205         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2206       }
2207     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2208       if (matis->A == pcbddc->local_mat) {
2209         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2210         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2211       } else {
2212         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2213       }
2214     }
2215     if (!sub_schurs->reuse_mumps) {
2216       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2217       if (pcbddc->local_mat->symmetric_set) {
2218         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2219       }
2220     } else {
2221       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2222 
2223       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2224       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
2225       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2226     }
2227     if (!pcbddc->ksp_R) { /* create object if not present */
2228       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2229       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2230       /* default */
2231       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2232       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2233       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2234       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2235       if (issbaij) {
2236         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2237       } else {
2238         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2239       }
2240       /* Allow user's customization */
2241       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2242       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2243     }
2244     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2245     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2246     if (!n_R) {
2247       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2248       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2249     }
2250     /* Reuse MUMPS solver if it is present */
2251     if (sub_schurs->reuse_mumps) {
2252       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2253 
2254       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2255     }
2256     /* Set Up KSP for Neumann problem of BDDC */
2257     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2258   }
2259   /* free Neumann problem's matrix */
2260   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2261 
2262   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2263   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2264     if (pcbddc->dbg_flag) {
2265       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2266       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2267       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2268     }
2269     if (dirichlet) { /* Dirichlet */
2270       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2271       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2272       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2273       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2274       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2275       /* need to be adapted? */
2276       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2277       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2278       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2279       /* print info */
2280       if (pcbddc->dbg_flag) {
2281         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2282         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2283       }
2284       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2285         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2286       }
2287     }
2288     if (neumann) { /* Neumann */
2289       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2290       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2291       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2292       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2293       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2294       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2295       /* need to be adapted? */
2296       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2297       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2298       /* print info */
2299       if (pcbddc->dbg_flag) {
2300         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2301         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2302       }
2303       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2304         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2305       }
2306     }
2307   }
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #undef __FUNCT__
2312 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2313 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2314 {
2315   PetscErrorCode  ierr;
2316   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2317   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2318 
2319   PetscFunctionBegin;
2320   if (!sub_schurs->reuse_mumps) {
2321     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2322   }
2323   if (!pcbddc->switch_static) {
2324     if (applytranspose && pcbddc->local_auxmat1) {
2325       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2326       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2327     }
2328     if (!sub_schurs->reuse_mumps) {
2329       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2330       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2331     } else {
2332       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2333 
2334       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2335       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2336     }
2337   } else {
2338     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2339     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2340     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2341     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2342     if (applytranspose && pcbddc->local_auxmat1) {
2343       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2344       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2345       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2346       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2347     }
2348   }
2349   if (!sub_schurs->reuse_mumps) {
2350     if (applytranspose) {
2351       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2352     } else {
2353       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2354     }
2355 #if defined(PETSC_HAVE_MUMPS)
2356   } else {
2357     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2358 
2359     if (applytranspose) {
2360       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2361     } else {
2362       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2363     }
2364 #endif
2365   }
2366   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2367   if (!pcbddc->switch_static) {
2368     if (!sub_schurs->reuse_mumps) {
2369       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2370       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2371     } else {
2372       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2373 
2374       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2375       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2376     }
2377     if (!applytranspose && pcbddc->local_auxmat1) {
2378       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2379       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2380     }
2381   } else {
2382     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2383     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2384     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2385     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2386     if (!applytranspose && pcbddc->local_auxmat1) {
2387       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2388       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2389     }
2390     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2391     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2393     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2399 #undef __FUNCT__
2400 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2401 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2402 {
2403   PetscErrorCode ierr;
2404   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2405   PC_IS*            pcis = (PC_IS*)  (pc->data);
2406   const PetscScalar zero = 0.0;
2407 
2408   PetscFunctionBegin;
2409   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2410   if (applytranspose) {
2411     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2412     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2413   } else {
2414     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2415     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2416   }
2417 
2418   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2419   if (pcbddc->benign_n) {
2420     PetscScalar *array;
2421     PetscInt    j;
2422 
2423     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2424     for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
2425     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2426   }
2427 
2428   /* start communications from local primal nodes to rhs of coarse solver */
2429   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2430   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432 
2433   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2434   /* TODO remove null space when doing multilevel */
2435   if (pcbddc->coarse_ksp) {
2436     Mat coarse_mat;
2437     Vec rhs,sol;
2438     MatNullSpace nullsp;
2439 
2440     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2441     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2442     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2443     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2444     if (nullsp) {
2445       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2446     }
2447     if (applytranspose) {
2448       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2449     } else {
2450       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2451     }
2452     if (nullsp) {
2453       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2454     }
2455   }
2456 
2457   /* Local solution on R nodes */
2458   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2459     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2460   }
2461 
2462   /* communications from coarse sol to local primal nodes */
2463   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2464   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2465 
2466   /* Sum contributions from two levels */
2467   if (applytranspose) {
2468     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2469     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2470   } else {
2471     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2472     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2473   }
2474   /* store p0 */
2475   if (pcbddc->benign_n) {
2476     PetscScalar *array;
2477     PetscInt    j;
2478 
2479     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2480     for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
2481     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2488 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2489 {
2490   PetscErrorCode ierr;
2491   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2492   PetscScalar    *array;
2493   Vec            from,to;
2494 
2495   PetscFunctionBegin;
2496   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2497     from = pcbddc->coarse_vec;
2498     to = pcbddc->vec1_P;
2499     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2500       Vec tvec;
2501 
2502       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2503       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2504       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2505       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2506       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2507       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2508     }
2509   } else { /* from local to global -> put data in coarse right hand side */
2510     from = pcbddc->vec1_P;
2511     to = pcbddc->coarse_vec;
2512   }
2513   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2519 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2520 {
2521   PetscErrorCode ierr;
2522   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2523   PetscScalar    *array;
2524   Vec            from,to;
2525 
2526   PetscFunctionBegin;
2527   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2528     from = pcbddc->coarse_vec;
2529     to = pcbddc->vec1_P;
2530   } else { /* from local to global -> put data in coarse right hand side */
2531     from = pcbddc->vec1_P;
2532     to = pcbddc->coarse_vec;
2533   }
2534   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2535   if (smode == SCATTER_FORWARD) {
2536     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2537       Vec tvec;
2538 
2539       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2540       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2541       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2542       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2543     }
2544   } else {
2545     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2546      ierr = VecResetArray(from);CHKERRQ(ierr);
2547     }
2548   }
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 /* uncomment for testing purposes */
2553 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2554 #undef __FUNCT__
2555 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2556 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2557 {
2558   PetscErrorCode    ierr;
2559   PC_IS*            pcis = (PC_IS*)(pc->data);
2560   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2561   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2562   /* one and zero */
2563   PetscScalar       one=1.0,zero=0.0;
2564   /* space to store constraints and their local indices */
2565   PetscScalar       *constraints_data;
2566   PetscInt          *constraints_idxs,*constraints_idxs_B;
2567   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2568   PetscInt          *constraints_n;
2569   /* iterators */
2570   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2571   /* BLAS integers */
2572   PetscBLASInt      lwork,lierr;
2573   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2574   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2575   /* reuse */
2576   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2577   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2578   /* change of basis */
2579   PetscBool         qr_needed;
2580   PetscBT           change_basis,qr_needed_idx;
2581   /* auxiliary stuff */
2582   PetscInt          *nnz,*is_indices;
2583   PetscInt          ncc;
2584   /* some quantities */
2585   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2586   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2587 
2588   PetscFunctionBegin;
2589   /* Destroy Mat objects computed previously */
2590   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2591   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2592   /* save info on constraints from previous setup (if any) */
2593   olocal_primal_size = pcbddc->local_primal_size;
2594   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2595   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2596   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2597   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2598   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2599   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2600 
2601   /* print some info */
2602   if (pcbddc->dbg_flag) {
2603     IS       vertices;
2604     PetscInt nv,nedges,nfaces;
2605     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2606     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2607     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2608     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2612     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2613     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2614   }
2615 
2616   if (!pcbddc->adaptive_selection) {
2617     IS           ISForVertices,*ISForFaces,*ISForEdges;
2618     MatNullSpace nearnullsp;
2619     const Vec    *nearnullvecs;
2620     Vec          *localnearnullsp;
2621     PetscScalar  *array;
2622     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2623     PetscBool    nnsp_has_cnst;
2624     /* LAPACK working arrays for SVD or POD */
2625     PetscBool    skip_lapack,boolforchange;
2626     PetscScalar  *work;
2627     PetscReal    *singular_vals;
2628 #if defined(PETSC_USE_COMPLEX)
2629     PetscReal    *rwork;
2630 #endif
2631 #if defined(PETSC_MISSING_LAPACK_GESVD)
2632     PetscScalar  *temp_basis,*correlation_mat;
2633 #else
2634     PetscBLASInt dummy_int=1;
2635     PetscScalar  dummy_scalar=1.;
2636 #endif
2637 
2638     /* Get index sets for faces, edges and vertices from graph */
2639     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2640     /* free unneeded index sets */
2641     if (!pcbddc->use_vertices) {
2642       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2643     }
2644     if (!pcbddc->use_edges) {
2645       for (i=0;i<n_ISForEdges;i++) {
2646         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2647       }
2648       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2649       n_ISForEdges = 0;
2650     }
2651     if (!pcbddc->use_faces) {
2652       for (i=0;i<n_ISForFaces;i++) {
2653         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2654       }
2655       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2656       n_ISForFaces = 0;
2657     }
2658 
2659 #if defined(PETSC_USE_DEBUG)
2660     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2661        Also use_change_of_basis should be consistent among processors */
2662     if (pcbddc->NullSpace) {
2663       PetscBool tbool[2],gbool[2];
2664 
2665       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2666         pcbddc->use_change_of_basis = PETSC_TRUE;
2667         if (!ISForEdges) {
2668           pcbddc->use_change_on_faces = PETSC_TRUE;
2669         }
2670       }
2671       tbool[0] = pcbddc->use_change_of_basis;
2672       tbool[1] = pcbddc->use_change_on_faces;
2673       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2674       pcbddc->use_change_of_basis = gbool[0];
2675       pcbddc->use_change_on_faces = gbool[1];
2676     }
2677 #endif
2678 
2679     /* check if near null space is attached to global mat */
2680     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2681     if (nearnullsp) {
2682       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2683       /* remove any stored info */
2684       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2685       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2686       /* store information for BDDC solver reuse */
2687       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2688       pcbddc->onearnullspace = nearnullsp;
2689       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2690       for (i=0;i<nnsp_size;i++) {
2691         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2692       }
2693     } else { /* if near null space is not provided BDDC uses constants by default */
2694       nnsp_size = 0;
2695       nnsp_has_cnst = PETSC_TRUE;
2696     }
2697     /* get max number of constraints on a single cc */
2698     max_constraints = nnsp_size;
2699     if (nnsp_has_cnst) max_constraints++;
2700 
2701     /*
2702          Evaluate maximum storage size needed by the procedure
2703          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2704          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2705          There can be multiple constraints per connected component
2706                                                                                                                                                            */
2707     n_vertices = 0;
2708     if (ISForVertices) {
2709       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2710     }
2711     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2712     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2713 
2714     total_counts = n_ISForFaces+n_ISForEdges;
2715     total_counts *= max_constraints;
2716     total_counts += n_vertices;
2717     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2718 
2719     total_counts = 0;
2720     max_size_of_constraint = 0;
2721     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2722       IS used_is;
2723       if (i<n_ISForEdges) {
2724         used_is = ISForEdges[i];
2725       } else {
2726         used_is = ISForFaces[i-n_ISForEdges];
2727       }
2728       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2729       total_counts += j;
2730       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2731     }
2732     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2733 
2734     /* get local part of global near null space vectors */
2735     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2736     for (k=0;k<nnsp_size;k++) {
2737       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2738       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2740     }
2741 
2742     /* whether or not to skip lapack calls */
2743     skip_lapack = PETSC_TRUE;
2744     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2745 
2746     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2747     if (!skip_lapack) {
2748       PetscScalar temp_work;
2749 
2750 #if defined(PETSC_MISSING_LAPACK_GESVD)
2751       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2752       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2753       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2754       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2755 #if defined(PETSC_USE_COMPLEX)
2756       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2757 #endif
2758       /* now we evaluate the optimal workspace using query with lwork=-1 */
2759       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2760       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2761       lwork = -1;
2762       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2763 #if !defined(PETSC_USE_COMPLEX)
2764       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2765 #else
2766       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2767 #endif
2768       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2769       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2770 #else /* on missing GESVD */
2771       /* SVD */
2772       PetscInt max_n,min_n;
2773       max_n = max_size_of_constraint;
2774       min_n = max_constraints;
2775       if (max_size_of_constraint < max_constraints) {
2776         min_n = max_size_of_constraint;
2777         max_n = max_constraints;
2778       }
2779       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2780 #if defined(PETSC_USE_COMPLEX)
2781       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2782 #endif
2783       /* now we evaluate the optimal workspace using query with lwork=-1 */
2784       lwork = -1;
2785       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2786       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2787       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2788       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2789 #if !defined(PETSC_USE_COMPLEX)
2790       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2791 #else
2792       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2793 #endif
2794       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2795       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2796 #endif /* on missing GESVD */
2797       /* Allocate optimal workspace */
2798       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2799       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2800     }
2801     /* Now we can loop on constraining sets */
2802     total_counts = 0;
2803     constraints_idxs_ptr[0] = 0;
2804     constraints_data_ptr[0] = 0;
2805     /* vertices */
2806     if (n_vertices) {
2807       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2808       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2809       for (i=0;i<n_vertices;i++) {
2810         constraints_n[total_counts] = 1;
2811         constraints_data[total_counts] = 1.0;
2812         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2813         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2814         total_counts++;
2815       }
2816       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2817       n_vertices = total_counts;
2818     }
2819 
2820     /* edges and faces */
2821     total_counts_cc = total_counts;
2822     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2823       IS        used_is;
2824       PetscBool idxs_copied = PETSC_FALSE;
2825 
2826       if (ncc<n_ISForEdges) {
2827         used_is = ISForEdges[ncc];
2828         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2829       } else {
2830         used_is = ISForFaces[ncc-n_ISForEdges];
2831         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2832       }
2833       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2834 
2835       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2836       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2837       /* change of basis should not be performed on local periodic nodes */
2838       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2839       if (nnsp_has_cnst) {
2840         PetscScalar quad_value;
2841 
2842         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2843         idxs_copied = PETSC_TRUE;
2844 
2845         if (!pcbddc->use_nnsp_true) {
2846           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2847         } else {
2848           quad_value = 1.0;
2849         }
2850         for (j=0;j<size_of_constraint;j++) {
2851           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2852         }
2853         temp_constraints++;
2854         total_counts++;
2855       }
2856       for (k=0;k<nnsp_size;k++) {
2857         PetscReal real_value;
2858         PetscScalar *ptr_to_data;
2859 
2860         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2861         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2862         for (j=0;j<size_of_constraint;j++) {
2863           ptr_to_data[j] = array[is_indices[j]];
2864         }
2865         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2866         /* check if array is null on the connected component */
2867         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2868         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2869         if (real_value > 0.0) { /* keep indices and values */
2870           temp_constraints++;
2871           total_counts++;
2872           if (!idxs_copied) {
2873             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2874             idxs_copied = PETSC_TRUE;
2875           }
2876         }
2877       }
2878       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2879       valid_constraints = temp_constraints;
2880       if (!pcbddc->use_nnsp_true && temp_constraints) {
2881         if (temp_constraints == 1) { /* just normalize the constraint */
2882           PetscScalar norm,*ptr_to_data;
2883 
2884           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2885           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2886           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2887           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2888           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2889         } else { /* perform SVD */
2890           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2891           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2892 
2893 #if defined(PETSC_MISSING_LAPACK_GESVD)
2894           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2895              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2896              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2897                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2898                 from that computed using LAPACKgesvd
2899              -> This is due to a different computation of eigenvectors in LAPACKheev
2900              -> The quality of the POD-computed basis will be the same */
2901           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2902           /* Store upper triangular part of correlation matrix */
2903           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2904           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2905           for (j=0;j<temp_constraints;j++) {
2906             for (k=0;k<j+1;k++) {
2907               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2908             }
2909           }
2910           /* compute eigenvalues and eigenvectors of correlation matrix */
2911           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2912           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2913 #if !defined(PETSC_USE_COMPLEX)
2914           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2915 #else
2916           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2917 #endif
2918           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2919           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2920           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2921           j = 0;
2922           while (j < temp_constraints && singular_vals[j] < tol) j++;
2923           total_counts = total_counts-j;
2924           valid_constraints = temp_constraints-j;
2925           /* scale and copy POD basis into used quadrature memory */
2926           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2927           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2928           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2929           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2930           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2931           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2932           if (j<temp_constraints) {
2933             PetscInt ii;
2934             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2935             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2936             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2937             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2938             for (k=0;k<temp_constraints-j;k++) {
2939               for (ii=0;ii<size_of_constraint;ii++) {
2940                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2941               }
2942             }
2943           }
2944 #else  /* on missing GESVD */
2945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2946           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2947           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2948           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2949 #if !defined(PETSC_USE_COMPLEX)
2950           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2951 #else
2952           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2953 #endif
2954           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2955           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2956           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2957           k = temp_constraints;
2958           if (k > size_of_constraint) k = size_of_constraint;
2959           j = 0;
2960           while (j < k && singular_vals[k-j-1] < tol) j++;
2961           valid_constraints = k-j;
2962           total_counts = total_counts-temp_constraints+valid_constraints;
2963 #endif /* on missing GESVD */
2964         }
2965       }
2966       /* update pointers information */
2967       if (valid_constraints) {
2968         constraints_n[total_counts_cc] = valid_constraints;
2969         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2970         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2971         /* set change_of_basis flag */
2972         if (boolforchange) {
2973           PetscBTSet(change_basis,total_counts_cc);
2974         }
2975         total_counts_cc++;
2976       }
2977     }
2978     /* free workspace */
2979     if (!skip_lapack) {
2980       ierr = PetscFree(work);CHKERRQ(ierr);
2981 #if defined(PETSC_USE_COMPLEX)
2982       ierr = PetscFree(rwork);CHKERRQ(ierr);
2983 #endif
2984       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2985 #if defined(PETSC_MISSING_LAPACK_GESVD)
2986       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2987       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2988 #endif
2989     }
2990     for (k=0;k<nnsp_size;k++) {
2991       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2992     }
2993     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2994     /* free index sets of faces, edges and vertices */
2995     for (i=0;i<n_ISForFaces;i++) {
2996       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2997     }
2998     if (n_ISForFaces) {
2999       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
3000     }
3001     for (i=0;i<n_ISForEdges;i++) {
3002       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
3003     }
3004     if (n_ISForEdges) {
3005       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
3006     }
3007     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
3008   } else {
3009     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3010 
3011     total_counts = 0;
3012     n_vertices = 0;
3013     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3014       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
3015     }
3016     max_constraints = 0;
3017     total_counts_cc = 0;
3018     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3019       total_counts += pcbddc->adaptive_constraints_n[i];
3020       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
3021       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
3022     }
3023     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
3024     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
3025     constraints_idxs = pcbddc->adaptive_constraints_idxs;
3026     constraints_data = pcbddc->adaptive_constraints_data;
3027     /* constraints_n differs from pcbddc->adaptive_constraints_n */
3028     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
3029     total_counts_cc = 0;
3030     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
3031       if (pcbddc->adaptive_constraints_n[i]) {
3032         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
3033       }
3034     }
3035 #if 0
3036     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
3037     for (i=0;i<total_counts_cc;i++) {
3038       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
3039       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
3040       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
3041         printf(" %d",constraints_idxs[j]);
3042       }
3043       printf("\n");
3044       printf("number of cc: %d\n",constraints_n[i]);
3045     }
3046     for (i=0;i<n_vertices;i++) {
3047       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
3048     }
3049     for (i=0;i<sub_schurs->n_subs;i++) {
3050       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
3051     }
3052 #endif
3053 
3054     max_size_of_constraint = 0;
3055     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
3056     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
3057     /* Change of basis */
3058     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
3059     if (pcbddc->use_change_of_basis) {
3060       for (i=0;i<sub_schurs->n_subs;i++) {
3061         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
3062           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
3063         }
3064       }
3065     }
3066   }
3067   pcbddc->local_primal_size = total_counts;
3068   ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3069 
3070   /* map constraints_idxs in boundary numbering */
3071   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
3072   if (i != constraints_idxs_ptr[total_counts_cc]) {
3073     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
3074   }
3075 
3076   /* Create constraint matrix */
3077   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
3078   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
3079   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
3080 
3081   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
3082   /* determine if a QR strategy is needed for change of basis */
3083   qr_needed = PETSC_FALSE;
3084   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
3085   total_primal_vertices=0;
3086   pcbddc->local_primal_size_cc = 0;
3087   for (i=0;i<total_counts_cc;i++) {
3088     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3089     if (size_of_constraint == 1) {
3090       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
3091       pcbddc->local_primal_size_cc += 1;
3092     } else if (PetscBTLookup(change_basis,i)) {
3093       for (k=0;k<constraints_n[i];k++) {
3094         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3095       }
3096       pcbddc->local_primal_size_cc += constraints_n[i];
3097       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
3098         PetscBTSet(qr_needed_idx,i);
3099         qr_needed = PETSC_TRUE;
3100       }
3101     } else {
3102       pcbddc->local_primal_size_cc += 1;
3103     }
3104   }
3105   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
3106   pcbddc->n_vertices = total_primal_vertices;
3107   /* permute indices in order to have a sorted set of vertices */
3108   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
3109 
3110   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
3111   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
3112   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
3113 
3114   /* nonzero structure of constraint matrix */
3115   /* and get reference dof for local constraints */
3116   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
3117   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
3118 
3119   j = total_primal_vertices;
3120   total_counts = total_primal_vertices;
3121   cum = total_primal_vertices;
3122   for (i=n_vertices;i<total_counts_cc;i++) {
3123     if (!PetscBTLookup(change_basis,i)) {
3124       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
3125       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
3126       cum++;
3127       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3128       for (k=0;k<constraints_n[i];k++) {
3129         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
3130         nnz[j+k] = size_of_constraint;
3131       }
3132       j += constraints_n[i];
3133     }
3134   }
3135   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
3136   ierr = PetscFree(nnz);CHKERRQ(ierr);
3137 
3138   /* set values in constraint matrix */
3139   for (i=0;i<total_primal_vertices;i++) {
3140     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3141   }
3142   total_counts = total_primal_vertices;
3143   for (i=n_vertices;i<total_counts_cc;i++) {
3144     if (!PetscBTLookup(change_basis,i)) {
3145       PetscInt *cols;
3146 
3147       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3148       cols = constraints_idxs+constraints_idxs_ptr[i];
3149       for (k=0;k<constraints_n[i];k++) {
3150         PetscInt    row = total_counts+k;
3151         PetscScalar *vals;
3152 
3153         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
3154         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3155       }
3156       total_counts += constraints_n[i];
3157     }
3158   }
3159   /* assembling */
3160   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162 
3163   /*
3164   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3165   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
3166   */
3167   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
3168   if (pcbddc->use_change_of_basis) {
3169     /* dual and primal dofs on a single cc */
3170     PetscInt     dual_dofs,primal_dofs;
3171     /* working stuff for GEQRF */
3172     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
3173     PetscBLASInt lqr_work;
3174     /* working stuff for UNGQR */
3175     PetscScalar  *gqr_work,lgqr_work_t;
3176     PetscBLASInt lgqr_work;
3177     /* working stuff for TRTRS */
3178     PetscScalar  *trs_rhs;
3179     PetscBLASInt Blas_NRHS;
3180     /* pointers for values insertion into change of basis matrix */
3181     PetscInt     *start_rows,*start_cols;
3182     PetscScalar  *start_vals;
3183     /* working stuff for values insertion */
3184     PetscBT      is_primal;
3185     PetscInt     *aux_primal_numbering_B;
3186     /* matrix sizes */
3187     PetscInt     global_size,local_size;
3188     /* temporary change of basis */
3189     Mat          localChangeOfBasisMatrix;
3190     /* extra space for debugging */
3191     PetscScalar  *dbg_work;
3192 
3193     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
3194     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
3195     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3196     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
3197     /* nonzeros for local mat */
3198     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
3199     for (i=0;i<pcis->n;i++) nnz[i]=1;
3200     for (i=n_vertices;i<total_counts_cc;i++) {
3201       if (PetscBTLookup(change_basis,i)) {
3202         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3203         if (PetscBTLookup(qr_needed_idx,i)) {
3204           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3205         } else {
3206           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3207           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3208         }
3209       }
3210     }
3211     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3212     ierr = PetscFree(nnz);CHKERRQ(ierr);
3213     /* Set initial identity in the matrix */
3214     for (i=0;i<pcis->n;i++) {
3215       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3216     }
3217 
3218     if (pcbddc->dbg_flag) {
3219       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3220       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3221     }
3222 
3223 
3224     /* Now we loop on the constraints which need a change of basis */
3225     /*
3226        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3227        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3228 
3229        Basic blocks of change of basis matrix T computed by
3230 
3231           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3232 
3233             | 1        0   ...        0         s_1/S |
3234             | 0        1   ...        0         s_2/S |
3235             |              ...                        |
3236             | 0        ...            1     s_{n-1}/S |
3237             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3238 
3239             with S = \sum_{i=1}^n s_i^2
3240             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3241                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3242 
3243           - QR decomposition of constraints otherwise
3244     */
3245     if (qr_needed) {
3246       /* space to store Q */
3247       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3248       /* first we issue queries for optimal work */
3249       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3250       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3251       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3252       lqr_work = -1;
3253       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3254       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3255       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3256       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3257       lgqr_work = -1;
3258       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3259       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3260       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3261       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3262       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3263       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3264       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3265       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3266       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3267       /* array to store scaling factors for reflectors */
3268       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3269       /* array to store rhs and solution of triangular solver */
3270       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3271       /* allocating workspace for check */
3272       if (pcbddc->dbg_flag) {
3273         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3274       }
3275     }
3276     /* array to store whether a node is primal or not */
3277     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3278     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3279     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3280     if (i != total_primal_vertices) {
3281       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3282     }
3283     for (i=0;i<total_primal_vertices;i++) {
3284       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3285     }
3286     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3287 
3288     /* loop on constraints and see whether or not they need a change of basis and compute it */
3289     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3290       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3291       if (PetscBTLookup(change_basis,total_counts)) {
3292         /* get constraint info */
3293         primal_dofs = constraints_n[total_counts];
3294         dual_dofs = size_of_constraint-primal_dofs;
3295 
3296         if (pcbddc->dbg_flag) {
3297           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3298         }
3299 
3300         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3301 
3302           /* copy quadrature constraints for change of basis check */
3303           if (pcbddc->dbg_flag) {
3304             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3305           }
3306           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3307           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3308 
3309           /* compute QR decomposition of constraints */
3310           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3311           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3312           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3313           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3314           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3315           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3316           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3317 
3318           /* explictly compute R^-T */
3319           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3320           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3321           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3322           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3323           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3324           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3325           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3326           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3327           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3328           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3329 
3330           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3331           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3332           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3333           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3334           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3335           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3336           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3337           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3338           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3339 
3340           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3341              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3342              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3343           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3344           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3345           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3346           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3347           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3348           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3349           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3350           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3351           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3352           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3353 
3354           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3355           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3356           /* insert cols for primal dofs */
3357           for (j=0;j<primal_dofs;j++) {
3358             start_vals = &qr_basis[j*size_of_constraint];
3359             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3360             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3361           }
3362           /* insert cols for dual dofs */
3363           for (j=0,k=0;j<dual_dofs;k++) {
3364             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3365               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3366               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3367               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3368               j++;
3369             }
3370           }
3371 
3372           /* check change of basis */
3373           if (pcbddc->dbg_flag) {
3374             PetscInt   ii,jj;
3375             PetscBool valid_qr=PETSC_TRUE;
3376             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3377             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3378             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3379             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3380             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3381             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3382             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3383             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3384             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3385             for (jj=0;jj<size_of_constraint;jj++) {
3386               for (ii=0;ii<primal_dofs;ii++) {
3387                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3388                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3389               }
3390             }
3391             if (!valid_qr) {
3392               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3393               for (jj=0;jj<size_of_constraint;jj++) {
3394                 for (ii=0;ii<primal_dofs;ii++) {
3395                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3396                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3397                   }
3398                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3399                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3400                   }
3401                 }
3402               }
3403             } else {
3404               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3405             }
3406           }
3407         } else { /* simple transformation block */
3408           PetscInt    row,col;
3409           PetscScalar val,norm;
3410 
3411           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3412           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3413           for (j=0;j<size_of_constraint;j++) {
3414             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3415             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3416             if (!PetscBTLookup(is_primal,row_B)) {
3417               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3418               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3419               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3420             } else {
3421               for (k=0;k<size_of_constraint;k++) {
3422                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3423                 if (row != col) {
3424                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3425                 } else {
3426                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3427                 }
3428                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3429               }
3430             }
3431           }
3432           if (pcbddc->dbg_flag) {
3433             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3434           }
3435         }
3436       } else {
3437         if (pcbddc->dbg_flag) {
3438           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3439         }
3440       }
3441     }
3442 
3443     /* free workspace */
3444     if (qr_needed) {
3445       if (pcbddc->dbg_flag) {
3446         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3447       }
3448       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3449       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3450       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3451       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3452       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3453     }
3454     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3455     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3456     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3457 
3458     /* assembling of global change of variable */
3459     {
3460       Mat      tmat;
3461       PetscInt bs;
3462 
3463       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3464       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3465       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3466       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3467       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3468       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3469       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3470       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3471       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3472       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3473       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3474       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3475       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3476       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3477       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3478       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3479       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3480       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3481     }
3482     /* check */
3483     if (pcbddc->dbg_flag) {
3484       PetscReal error;
3485       Vec       x,x_change;
3486 
3487       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3488       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3489       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3490       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3491       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3492       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3493       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3494       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3495       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3496       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3497       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3498       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3499       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3500       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3501       ierr = VecDestroy(&x);CHKERRQ(ierr);
3502       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3503     }
3504 
3505     /* adapt sub_schurs computed (if any) */
3506     if (pcbddc->use_deluxe_scaling) {
3507       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3508       if (sub_schurs->S_Ej_all) {
3509         Mat                    S_new,tmat;
3510         ISLocalToGlobalMapping NtoSall;
3511         IS                     is_all_N,is_V,is_V_Sall;
3512         const PetscScalar      *array;
3513         const PetscInt         *idxs_V,*idxs_all;
3514         PetscInt               i,n_V;
3515 
3516         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3517         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3518         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3519         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3520         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3521         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3522         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3523         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3524         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3525         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3526         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3527         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3528         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3529         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3530         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3531         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3532         for (i=0;i<n_V;i++) {
3533           PetscScalar val;
3534           PetscInt    idx;
3535 
3536           idx = idxs_V[i];
3537           val = array[idxs_all[idxs_V[i]]];
3538           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3539         }
3540         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3541         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3542         sub_schurs->S_Ej_all = S_new;
3543         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3544         if (sub_schurs->sum_S_Ej_all) {
3545           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3546           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3547           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3548           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3549           sub_schurs->sum_S_Ej_all = S_new;
3550           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3551         }
3552         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3553         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3554         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3555         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3556         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3557       }
3558     }
3559     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3560   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3561     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3562     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3563   }
3564 
3565   /* set up change of basis context */
3566   if (pcbddc->ChangeOfBasisMatrix) {
3567     PCBDDCChange_ctx change_ctx;
3568 
3569     if (!pcbddc->new_global_mat) {
3570       PetscInt global_size,local_size;
3571 
3572       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3573       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3574       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3575       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3576       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3577       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3578       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3579       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3580       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3581     } else {
3582       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3583       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3584       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3585     }
3586     if (!pcbddc->user_ChangeOfBasisMatrix) {
3587       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3588       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3589     } else {
3590       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3591       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3592     }
3593     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3594     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3595     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3597   }
3598 
3599   /* add pressure dofs to set of primal nodes for numbering purposes */
3600   for (i=0;i<pcbddc->benign_n;i++) {
3601     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
3602     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
3603     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3604     pcbddc->local_primal_size_cc++;
3605     pcbddc->local_primal_size++;
3606   }
3607 
3608   /* check if a new primal space has been introduced (also take into account benign trick) */
3609   pcbddc->new_primal_space_local = PETSC_TRUE;
3610   if (olocal_primal_size == pcbddc->local_primal_size) {
3611     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3612     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3613     if (!pcbddc->new_primal_space_local) {
3614       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3615       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3616     }
3617   }
3618   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3619   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3620   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3621 
3622   /* flush dbg viewer */
3623   if (pcbddc->dbg_flag) {
3624     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3625   }
3626 
3627   /* free workspace */
3628   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3629   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3630   if (!pcbddc->adaptive_selection) {
3631     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3632     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3633   } else {
3634     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3635                       pcbddc->adaptive_constraints_idxs_ptr,
3636                       pcbddc->adaptive_constraints_data_ptr,
3637                       pcbddc->adaptive_constraints_idxs,
3638                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3639     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3640     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3641   }
3642   PetscFunctionReturn(0);
3643 }
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3647 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3648 {
3649   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3650   PC_IS       *pcis = (PC_IS*)pc->data;
3651   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3652   PetscInt    ierr,i,vertex_size,N;
3653   PetscViewer viewer=pcbddc->dbg_viewer;
3654 
3655   PetscFunctionBegin;
3656   /* Reset previously computed graph */
3657   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3658   /* Init local Graph struct */
3659   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3660   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3661 
3662   /* Check validity of the csr graph passed in by the user */
3663   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3664     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3665   }
3666 
3667   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3668   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3669     PetscInt  *xadj,*adjncy;
3670     PetscInt  nvtxs;
3671     PetscBool flg_row=PETSC_FALSE;
3672 
3673     if (pcbddc->use_local_adj) {
3674 
3675       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3676       if (flg_row) {
3677         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3678         pcbddc->computed_rowadj = PETSC_TRUE;
3679       }
3680       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3681     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3682       IS                     is_dummy;
3683       ISLocalToGlobalMapping l2gmap_dummy;
3684       PetscInt               j,sum;
3685       PetscInt               *cxadj,*cadjncy;
3686       const PetscInt         *idxs;
3687       PCBDDCGraph            graph;
3688       PetscBT                is_on_boundary;
3689 
3690       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3691       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3692       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3693       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3694       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3695       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3696       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3697       if (flg_row) {
3698         graph->xadj = xadj;
3699         graph->adjncy = adjncy;
3700       }
3701       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3702       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3703       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3704 
3705       if (pcbddc->dbg_flag) {
3706         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3707         for (i=0;i<graph->ncc;i++) {
3708           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3709         }
3710       }
3711 
3712       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3713       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3714       for (i=0;i<pcis->n_B;i++) {
3715         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3716       }
3717       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3718 
3719       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3720       sum = 0;
3721       for (i=0;i<graph->ncc;i++) {
3722         PetscInt sizecc = 0;
3723         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3724           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3725             sizecc++;
3726           }
3727         }
3728         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3729           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3730             cxadj[graph->queue[j]] = sizecc;
3731           }
3732         }
3733         sum += sizecc*sizecc;
3734       }
3735       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3736       sum = 0;
3737       for (i=0;i<pcis->n;i++) {
3738         PetscInt temp = cxadj[i];
3739         cxadj[i] = sum;
3740         sum += temp;
3741       }
3742       cxadj[pcis->n] = sum;
3743       for (i=0;i<graph->ncc;i++) {
3744         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3745           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3746             PetscInt k,sizecc = 0;
3747             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3748               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3749                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3750                 sizecc++;
3751               }
3752             }
3753           }
3754         }
3755       }
3756       if (sum) {
3757         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3758       } else {
3759         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3760         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3761       }
3762       graph->xadj = 0;
3763       graph->adjncy = 0;
3764       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3765       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3766     }
3767   }
3768   if (pcbddc->dbg_flag) {
3769     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3770   }
3771 
3772   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3773   vertex_size = 1;
3774   if (pcbddc->user_provided_isfordofs) {
3775     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3776       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3777       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3778         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3779         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3780       }
3781       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3782       pcbddc->n_ISForDofs = 0;
3783       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3784     }
3785     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3786     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3787   } else {
3788     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3789       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3790       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3791       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3792         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3793       }
3794     }
3795   }
3796 
3797   /* Setup of Graph */
3798   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3799     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3800   }
3801   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3802     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3803   }
3804   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3805 
3806   /* attach info on disconnected subdomains if present */
3807   if (pcbddc->n_local_subs) {
3808     PetscInt *local_subs;
3809 
3810     ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr);
3811     for (i=0;i<pcbddc->n_local_subs;i++) {
3812       const PetscInt *idxs;
3813       PetscInt       nl,j;
3814 
3815       ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr);
3816       ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3817       for (j=0;j<nl;j++) {
3818         local_subs[idxs[j]] = i;
3819       }
3820       ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr);
3821     }
3822     pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs;
3823     pcbddc->mat_graph->local_subs = local_subs;
3824   }
3825 
3826   /* Graph's connected components analysis */
3827   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3828 
3829   /* print some info to stdout */
3830   if (pcbddc->dbg_flag) {
3831     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3832   }
3833 
3834   /* mark topography has done */
3835   pcbddc->recompute_topography = PETSC_FALSE;
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3840 #undef __FUNCT__
3841 #define __FUNCT__ "PCBDDCSubsetNumbering"
3842 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3843 {
3844   PetscSF        sf;
3845   PetscLayout    map;
3846   const PetscInt *idxs;
3847   PetscInt       *leaf_data,*root_data,*gidxs;
3848   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3849   PetscInt       n_n,nlocals,start,first_index;
3850   PetscMPIInt    commsize;
3851   PetscBool      first_found;
3852   PetscErrorCode ierr;
3853 
3854   PetscFunctionBegin;
3855   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3856   if (subset_mult) {
3857     PetscCheckSameComm(subset,1,subset_mult,2);
3858     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3859     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3860   }
3861   /* create workspace layout for computing global indices of subset */
3862   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3863   lbounds[0] = lbounds[1] = 0;
3864   for (i=0;i<n;i++) {
3865     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3866     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3867   }
3868   lbounds[0] = -lbounds[0];
3869   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3870   gbounds[0] = -gbounds[0];
3871   N = gbounds[1] - gbounds[0] + 1;
3872   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3873   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3874   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3875   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3876   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3877 
3878   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3879   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3880   if (subset_mult) {
3881     const PetscInt* idxs_mult;
3882 
3883     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3884     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3885     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3886   } else {
3887     for (i=0;i<n;i++) leaf_data[i] = 1;
3888   }
3889   /* local size of new subset */
3890   n_n = 0;
3891   for (i=0;i<n;i++) n_n += leaf_data[i];
3892 
3893   /* global indexes in layout */
3894   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3895   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3896   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3897   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3898   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3899   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3900 
3901   /* reduce from leaves to roots */
3902   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3903   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3904   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3905 
3906   /* count indexes in local part of layout */
3907   nlocals = 0;
3908   first_index = -1;
3909   first_found = PETSC_FALSE;
3910   for (i=0;i<Nl;i++) {
3911     if (!first_found && root_data[i]) {
3912       first_found = PETSC_TRUE;
3913       first_index = i;
3914     }
3915     nlocals += root_data[i];
3916   }
3917 
3918   /* cumulative of number of indexes and size of subset without holes */
3919 #if defined(PETSC_HAVE_MPI_EXSCAN)
3920   start = 0;
3921   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3922 #else
3923   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3924   start = start-nlocals;
3925 #endif
3926 
3927   if (N_n) { /* compute total size of new subset if requested */
3928     *N_n = start + nlocals;
3929     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3930     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3931   }
3932 
3933   /* adapt root data with cumulative */
3934   if (first_found) {
3935     PetscInt old_index;
3936 
3937     root_data[first_index] += start;
3938     old_index = first_index;
3939     for (i=first_index+1;i<Nl;i++) {
3940       if (root_data[i]) {
3941         root_data[i] += root_data[old_index];
3942         old_index = i;
3943       }
3944     }
3945   }
3946 
3947   /* from roots to leaves */
3948   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3949   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3950   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3951 
3952   /* create new IS with global indexes without holes */
3953   if (subset_mult) {
3954     const PetscInt* idxs_mult;
3955     PetscInt        cum;
3956 
3957     cum = 0;
3958     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3959     for (i=0;i<n;i++) {
3960       PetscInt j;
3961       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3962     }
3963     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3964   } else {
3965     for (i=0;i<n;i++) {
3966       gidxs[i] = leaf_data[i]-1;
3967     }
3968   }
3969   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3970   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 #undef __FUNCT__
3975 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3976 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3977 {
3978   PetscInt       i,j;
3979   PetscScalar    *alphas;
3980   PetscErrorCode ierr;
3981 
3982   PetscFunctionBegin;
3983   /* this implements stabilized Gram-Schmidt */
3984   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3985   for (i=0;i<n;i++) {
3986     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3987     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3988     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3989   }
3990   ierr = PetscFree(alphas);CHKERRQ(ierr);
3991   PetscFunctionReturn(0);
3992 }
3993 
3994 #undef __FUNCT__
3995 #define __FUNCT__ "MatISGetSubassemblingPattern"
3996 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3997 {
3998   IS             ranks_send_to;
3999   PetscInt       n_neighs,*neighs,*n_shared,**shared;
4000   PetscMPIInt    size,rank,color;
4001   PetscInt       *xadj,*adjncy;
4002   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
4003   PetscInt       i,local_size,threshold=0;
4004   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
4005   PetscSubcomm   subcomm;
4006   PetscErrorCode ierr;
4007 
4008   PetscFunctionBegin;
4009   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
4010   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
4011   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
4012 
4013   /* Get info on mapping */
4014   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
4015   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4016 
4017   /* build local CSR graph of subdomains' connectivity */
4018   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
4019   xadj[0] = 0;
4020   xadj[1] = PetscMax(n_neighs-1,0);
4021   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
4022   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
4023 
4024   if (threshold) {
4025     PetscInt xadj_count = 0;
4026     for (i=1;i<n_neighs;i++) {
4027       if (n_shared[i] > threshold) {
4028         adjncy[xadj_count] = neighs[i];
4029         adjncy_wgt[xadj_count] = n_shared[i];
4030         xadj_count++;
4031       }
4032     }
4033     xadj[1] = xadj_count;
4034   } else {
4035     if (xadj[1]) {
4036       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
4037       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
4038     }
4039   }
4040   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
4041   if (use_square) {
4042     for (i=0;i<xadj[1];i++) {
4043       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
4044     }
4045   }
4046   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4047 
4048   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
4049 
4050   /*
4051     Restrict work on active processes only.
4052   */
4053   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
4054   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
4055   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
4056   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
4057   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4058   if (color) {
4059     ierr = PetscFree(xadj);CHKERRQ(ierr);
4060     ierr = PetscFree(adjncy);CHKERRQ(ierr);
4061     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4062   } else {
4063     Mat             subdomain_adj;
4064     IS              new_ranks,new_ranks_contig;
4065     MatPartitioning partitioner;
4066     PetscInt        prank,rstart=0,rend=0;
4067     PetscInt        *is_indices,*oldranks;
4068     PetscBool       aggregate;
4069 
4070     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
4071     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
4072     prank = rank;
4073     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
4074     /*
4075     for (i=0;i<size;i++) {
4076       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
4077     }
4078     */
4079     for (i=0;i<xadj[1];i++) {
4080       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
4081     }
4082     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
4083     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
4084     if (aggregate) {
4085       PetscInt    lrows,row,ncols,*cols;
4086       PetscMPIInt nrank;
4087       PetscScalar *vals;
4088 
4089       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
4090       lrows = 0;
4091       if (nrank<redprocs) {
4092         lrows = size/redprocs;
4093         if (nrank<size%redprocs) lrows++;
4094       }
4095       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
4096       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
4097       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4098       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4099       row = nrank;
4100       ncols = xadj[1]-xadj[0];
4101       cols = adjncy;
4102       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
4103       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
4104       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4105       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4106       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4107       ierr = PetscFree(xadj);CHKERRQ(ierr);
4108       ierr = PetscFree(adjncy);CHKERRQ(ierr);
4109       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
4110       ierr = PetscFree(vals);CHKERRQ(ierr);
4111     } else {
4112       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
4113     }
4114     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
4115 
4116     /* Partition */
4117     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
4118     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
4119     if (use_vwgt) {
4120       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
4121       v_wgt[0] = local_size;
4122       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
4123     }
4124     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
4125     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
4126     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
4127     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
4128     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
4129 
4130     /* renumber new_ranks to avoid "holes" in new set of processors */
4131     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
4132     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
4133     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4134     if (!redprocs) {
4135       ranks_send_to_idx[0] = oldranks[is_indices[0]];
4136     } else {
4137       PetscInt    idxs[1];
4138       PetscMPIInt tag;
4139       MPI_Request *reqs;
4140 
4141       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
4142       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
4143       for (i=rstart;i<rend;i++) {
4144         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
4145       }
4146       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
4147       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4148       ierr = PetscFree(reqs);CHKERRQ(ierr);
4149       ranks_send_to_idx[0] = oldranks[idxs[0]];
4150     }
4151     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
4152     /* clean up */
4153     ierr = PetscFree(oldranks);CHKERRQ(ierr);
4154     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
4155     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
4156     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
4157   }
4158   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4159 
4160   /* assemble parallel IS for sends */
4161   i = 1;
4162   if (color) i=0;
4163   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
4164   /* get back IS */
4165   *is_sends = ranks_send_to;
4166   PetscFunctionReturn(0);
4167 }
4168 
4169 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
4170 
4171 #undef __FUNCT__
4172 #define __FUNCT__ "MatISSubassemble"
4173 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
4174 {
4175   Mat                    local_mat;
4176   IS                     is_sends_internal;
4177   PetscInt               rows,cols,new_local_rows;
4178   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
4179   PetscBool              ismatis,isdense,newisdense,destroy_mat;
4180   ISLocalToGlobalMapping l2gmap;
4181   PetscInt*              l2gmap_indices;
4182   const PetscInt*        is_indices;
4183   MatType                new_local_type;
4184   /* buffers */
4185   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
4186   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
4187   PetscInt               *recv_buffer_idxs_local;
4188   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
4189   /* MPI */
4190   MPI_Comm               comm,comm_n;
4191   PetscSubcomm           subcomm;
4192   PetscMPIInt            n_sends,n_recvs,commsize;
4193   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
4194   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
4195   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
4196   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
4197   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
4198   PetscErrorCode         ierr;
4199 
4200   PetscFunctionBegin;
4201   /* TODO: add missing checks */
4202   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
4203   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
4204   PetscValidLogicalCollectiveEnum(mat,reuse,5);
4205   PetscValidLogicalCollectiveInt(mat,nis,7);
4206   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
4207   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
4208   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4209   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
4210   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
4211   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
4212   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
4213   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
4214     PetscInt mrows,mcols,mnrows,mncols;
4215     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
4216     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
4217     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
4218     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
4219     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
4220     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
4221   }
4222   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4223   PetscValidLogicalCollectiveInt(mat,bs,0);
4224   /* prepare IS for sending if not provided */
4225   if (!is_sends) {
4226     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4227     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4228   } else {
4229     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4230     is_sends_internal = is_sends;
4231   }
4232 
4233   /* get comm */
4234   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4235 
4236   /* compute number of sends */
4237   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4238   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4239 
4240   /* compute number of receives */
4241   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4242   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4243   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4244   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4245   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4246   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4247   ierr = PetscFree(iflags);CHKERRQ(ierr);
4248 
4249   /* restrict comm if requested */
4250   subcomm = 0;
4251   destroy_mat = PETSC_FALSE;
4252   if (restrict_comm) {
4253     PetscMPIInt color,subcommsize;
4254 
4255     color = 0;
4256     if (restrict_full) {
4257       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4258     } else {
4259       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4260     }
4261     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4262     subcommsize = commsize - subcommsize;
4263     /* check if reuse has been requested */
4264     if (reuse == MAT_REUSE_MATRIX) {
4265       if (*mat_n) {
4266         PetscMPIInt subcommsize2;
4267         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4268         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4269         comm_n = PetscObjectComm((PetscObject)*mat_n);
4270       } else {
4271         comm_n = PETSC_COMM_SELF;
4272       }
4273     } else { /* MAT_INITIAL_MATRIX */
4274       PetscMPIInt rank;
4275 
4276       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4277       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4278       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4279       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4280       comm_n = PetscSubcommChild(subcomm);
4281     }
4282     /* flag to destroy *mat_n if not significative */
4283     if (color) destroy_mat = PETSC_TRUE;
4284   } else {
4285     comm_n = comm;
4286   }
4287 
4288   /* prepare send/receive buffers */
4289   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4290   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4291   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4292   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4293   if (nis) {
4294     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4295   }
4296 
4297   /* Get data from local matrices */
4298   if (!isdense) {
4299     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4300     /* TODO: See below some guidelines on how to prepare the local buffers */
4301     /*
4302        send_buffer_vals should contain the raw values of the local matrix
4303        send_buffer_idxs should contain:
4304        - MatType_PRIVATE type
4305        - PetscInt        size_of_l2gmap
4306        - PetscInt        global_row_indices[size_of_l2gmap]
4307        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4308     */
4309   } else {
4310     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4311     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4312     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4313     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4314     send_buffer_idxs[1] = i;
4315     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4316     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4317     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4318     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4319     for (i=0;i<n_sends;i++) {
4320       ilengths_vals[is_indices[i]] = len*len;
4321       ilengths_idxs[is_indices[i]] = len+2;
4322     }
4323   }
4324   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4325   /* additional is (if any) */
4326   if (nis) {
4327     PetscMPIInt psum;
4328     PetscInt j;
4329     for (j=0,psum=0;j<nis;j++) {
4330       PetscInt plen;
4331       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4332       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4333       psum += len+1; /* indices + lenght */
4334     }
4335     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4336     for (j=0,psum=0;j<nis;j++) {
4337       PetscInt plen;
4338       const PetscInt *is_array_idxs;
4339       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4340       send_buffer_idxs_is[psum] = plen;
4341       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4342       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4343       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4344       psum += plen+1; /* indices + lenght */
4345     }
4346     for (i=0;i<n_sends;i++) {
4347       ilengths_idxs_is[is_indices[i]] = psum;
4348     }
4349     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4350   }
4351 
4352   buf_size_idxs = 0;
4353   buf_size_vals = 0;
4354   buf_size_idxs_is = 0;
4355   for (i=0;i<n_recvs;i++) {
4356     buf_size_idxs += (PetscInt)olengths_idxs[i];
4357     buf_size_vals += (PetscInt)olengths_vals[i];
4358     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4359   }
4360   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4361   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4362   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4363 
4364   /* get new tags for clean communications */
4365   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4366   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4367   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4368 
4369   /* allocate for requests */
4370   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4371   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4372   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4373   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4374   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4375   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4376 
4377   /* communications */
4378   ptr_idxs = recv_buffer_idxs;
4379   ptr_vals = recv_buffer_vals;
4380   ptr_idxs_is = recv_buffer_idxs_is;
4381   for (i=0;i<n_recvs;i++) {
4382     source_dest = onodes[i];
4383     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4384     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4385     ptr_idxs += olengths_idxs[i];
4386     ptr_vals += olengths_vals[i];
4387     if (nis) {
4388       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4389       ptr_idxs_is += olengths_idxs_is[i];
4390     }
4391   }
4392   for (i=0;i<n_sends;i++) {
4393     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4394     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4395     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4396     if (nis) {
4397       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4398     }
4399   }
4400   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4401   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4402 
4403   /* assemble new l2g map */
4404   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4405   ptr_idxs = recv_buffer_idxs;
4406   new_local_rows = 0;
4407   for (i=0;i<n_recvs;i++) {
4408     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4409     ptr_idxs += olengths_idxs[i];
4410   }
4411   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4412   ptr_idxs = recv_buffer_idxs;
4413   new_local_rows = 0;
4414   for (i=0;i<n_recvs;i++) {
4415     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4416     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4417     ptr_idxs += olengths_idxs[i];
4418   }
4419   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4420   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4421   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4422 
4423   /* infer new local matrix type from received local matrices type */
4424   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4425   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4426   if (n_recvs) {
4427     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4428     ptr_idxs = recv_buffer_idxs;
4429     for (i=0;i<n_recvs;i++) {
4430       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4431         new_local_type_private = MATAIJ_PRIVATE;
4432         break;
4433       }
4434       ptr_idxs += olengths_idxs[i];
4435     }
4436     switch (new_local_type_private) {
4437       case MATDENSE_PRIVATE:
4438         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4439           new_local_type = MATSEQAIJ;
4440           bs = 1;
4441         } else { /* if I receive only 1 dense matrix */
4442           new_local_type = MATSEQDENSE;
4443           bs = 1;
4444         }
4445         break;
4446       case MATAIJ_PRIVATE:
4447         new_local_type = MATSEQAIJ;
4448         bs = 1;
4449         break;
4450       case MATBAIJ_PRIVATE:
4451         new_local_type = MATSEQBAIJ;
4452         break;
4453       case MATSBAIJ_PRIVATE:
4454         new_local_type = MATSEQSBAIJ;
4455         break;
4456       default:
4457         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4458         break;
4459     }
4460   } else { /* by default, new_local_type is seqdense */
4461     new_local_type = MATSEQDENSE;
4462     bs = 1;
4463   }
4464 
4465   /* create MATIS object if needed */
4466   if (reuse == MAT_INITIAL_MATRIX) {
4467     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4468     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4469   } else {
4470     /* it also destroys the local matrices */
4471     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4472   }
4473   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4474   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4475 
4476   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4477 
4478   /* Global to local map of received indices */
4479   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4480   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4481   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4482 
4483   /* restore attributes -> type of incoming data and its size */
4484   buf_size_idxs = 0;
4485   for (i=0;i<n_recvs;i++) {
4486     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4487     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4488     buf_size_idxs += (PetscInt)olengths_idxs[i];
4489   }
4490   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4491 
4492   /* set preallocation */
4493   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4494   if (!newisdense) {
4495     PetscInt *new_local_nnz=0;
4496 
4497     ptr_vals = recv_buffer_vals;
4498     ptr_idxs = recv_buffer_idxs_local;
4499     if (n_recvs) {
4500       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4501     }
4502     for (i=0;i<n_recvs;i++) {
4503       PetscInt j;
4504       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4505         for (j=0;j<*(ptr_idxs+1);j++) {
4506           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4507         }
4508       } else {
4509         /* TODO */
4510       }
4511       ptr_idxs += olengths_idxs[i];
4512     }
4513     if (new_local_nnz) {
4514       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4515       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4516       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4517       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4518       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4519       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4520     } else {
4521       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4522     }
4523     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4524   } else {
4525     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4526   }
4527 
4528   /* set values */
4529   ptr_vals = recv_buffer_vals;
4530   ptr_idxs = recv_buffer_idxs_local;
4531   for (i=0;i<n_recvs;i++) {
4532     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4533       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4534       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4535       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4536       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4537       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4538     } else {
4539       /* TODO */
4540     }
4541     ptr_idxs += olengths_idxs[i];
4542     ptr_vals += olengths_vals[i];
4543   }
4544   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4545   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4546   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4549   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4550 
4551 #if 0
4552   if (!restrict_comm) { /* check */
4553     Vec       lvec,rvec;
4554     PetscReal infty_error;
4555 
4556     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4557     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4558     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4559     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4560     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4561     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4562     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4563     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4564     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4565   }
4566 #endif
4567 
4568   /* assemble new additional is (if any) */
4569   if (nis) {
4570     PetscInt **temp_idxs,*count_is,j,psum;
4571 
4572     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4573     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4574     ptr_idxs = recv_buffer_idxs_is;
4575     psum = 0;
4576     for (i=0;i<n_recvs;i++) {
4577       for (j=0;j<nis;j++) {
4578         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4579         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4580         psum += plen;
4581         ptr_idxs += plen+1; /* shift pointer to received data */
4582       }
4583     }
4584     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4585     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4586     for (i=1;i<nis;i++) {
4587       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4588     }
4589     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4590     ptr_idxs = recv_buffer_idxs_is;
4591     for (i=0;i<n_recvs;i++) {
4592       for (j=0;j<nis;j++) {
4593         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4594         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4595         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4596         ptr_idxs += plen+1; /* shift pointer to received data */
4597       }
4598     }
4599     for (i=0;i<nis;i++) {
4600       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4601       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4602       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4603     }
4604     ierr = PetscFree(count_is);CHKERRQ(ierr);
4605     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4606     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4607   }
4608   /* free workspace */
4609   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4610   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4611   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4612   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4613   if (isdense) {
4614     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4615     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4616   } else {
4617     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4618   }
4619   if (nis) {
4620     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4621     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4622   }
4623   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4624   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4625   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4626   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4627   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4628   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4629   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4630   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4631   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4632   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4633   ierr = PetscFree(onodes);CHKERRQ(ierr);
4634   if (nis) {
4635     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4636     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4637     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4638   }
4639   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4640   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4641     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4642     for (i=0;i<nis;i++) {
4643       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4644     }
4645     *mat_n = NULL;
4646   }
4647   PetscFunctionReturn(0);
4648 }
4649 
4650 /* temporary hack into ksp private data structure */
4651 #include <petsc/private/kspimpl.h>
4652 
4653 #undef __FUNCT__
4654 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4655 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4656 {
4657   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4658   PC_IS                  *pcis = (PC_IS*)pc->data;
4659   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4660   MatNullSpace           CoarseNullSpace=NULL;
4661   ISLocalToGlobalMapping coarse_islg;
4662   IS                     coarse_is,*isarray;
4663   PetscInt               i,im_active=-1,active_procs=-1;
4664   PetscInt               nis,nisdofs,nisneu;
4665   PC                     pc_temp;
4666   PCType                 coarse_pc_type;
4667   KSPType                coarse_ksp_type;
4668   PetscBool              multilevel_requested,multilevel_allowed;
4669   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4670   Mat                    t_coarse_mat_is;
4671   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4672   PetscMPIInt            all_procs;
4673   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4674   PetscBool              compute_vecs = PETSC_FALSE;
4675   PetscScalar            *array;
4676   PetscErrorCode         ierr;
4677 
4678   PetscFunctionBegin;
4679   /* Assign global numbering to coarse dofs */
4680   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4681     PetscInt ocoarse_size;
4682     compute_vecs = PETSC_TRUE;
4683     ocoarse_size = pcbddc->coarse_size;
4684     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4685     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4686     /* see if we can avoid some work */
4687     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4688       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4689       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4690         PC        pc;
4691         PetscBool isbddc;
4692 
4693         /* temporary workaround since PCBDDC does not have a reset method so far */
4694         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4695         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4696         if (isbddc) {
4697           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4698         }
4699         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4700         coarse_reuse = PETSC_FALSE;
4701       } else { /* we can safely reuse already computed coarse matrix */
4702         coarse_reuse = PETSC_TRUE;
4703       }
4704     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4705       coarse_reuse = PETSC_FALSE;
4706     }
4707     /* reset any subassembling information */
4708     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4709     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4710   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4711     coarse_reuse = PETSC_TRUE;
4712   }
4713 
4714   /* count "active" (i.e. with positive local size) and "void" processes */
4715   im_active = !!(pcis->n);
4716   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4717   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4718   void_procs = all_procs-active_procs;
4719   csin_type_simple = PETSC_TRUE;
4720   redist = PETSC_FALSE;
4721   if (pcbddc->current_level && void_procs) {
4722     csin_ml = PETSC_TRUE;
4723     ncoarse_ml = void_procs;
4724     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4725     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4726       csin_ds = PETSC_TRUE;
4727       ncoarse_ds = pcbddc->redistribute_coarse;
4728       redist = PETSC_TRUE;
4729     } else {
4730       csin_ds = PETSC_TRUE;
4731       ncoarse_ds = active_procs;
4732       redist = PETSC_TRUE;
4733     }
4734   } else {
4735     csin_ml = PETSC_FALSE;
4736     ncoarse_ml = all_procs;
4737     if (void_procs) {
4738       csin_ds = PETSC_TRUE;
4739       ncoarse_ds = void_procs;
4740       csin_type_simple = PETSC_FALSE;
4741     } else {
4742       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4743         csin_ds = PETSC_TRUE;
4744         ncoarse_ds = pcbddc->redistribute_coarse;
4745         redist = PETSC_TRUE;
4746       } else {
4747         csin_ds = PETSC_FALSE;
4748         ncoarse_ds = all_procs;
4749       }
4750     }
4751   }
4752 
4753   /*
4754     test if we can go multilevel: three conditions must be satisfied:
4755     - we have not exceeded the number of levels requested
4756     - we can actually subassemble the active processes
4757     - we can find a suitable number of MPI processes where we can place the subassembled problem
4758   */
4759   multilevel_allowed = PETSC_FALSE;
4760   multilevel_requested = PETSC_FALSE;
4761   if (pcbddc->current_level < pcbddc->max_levels) {
4762     multilevel_requested = PETSC_TRUE;
4763     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4764       multilevel_allowed = PETSC_FALSE;
4765     } else {
4766       multilevel_allowed = PETSC_TRUE;
4767     }
4768   }
4769   /* determine number of process partecipating to coarse solver */
4770   if (multilevel_allowed) {
4771     ncoarse = ncoarse_ml;
4772     csin = csin_ml;
4773     redist = PETSC_FALSE;
4774   } else {
4775     ncoarse = ncoarse_ds;
4776     csin = csin_ds;
4777   }
4778 
4779   /* creates temporary l2gmap and IS for coarse indexes */
4780   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4781   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4782 
4783   /* creates temporary MATIS object for coarse matrix */
4784   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4785   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4786   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4787   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4788 #if 0
4789   {
4790     PetscViewer viewer;
4791     char filename[256];
4792     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4793     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4794     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4795     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4796     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4797   }
4798 #endif
4799   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4800   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4801   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4802   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4803   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4804 
4805   /* compute dofs splitting and neumann boundaries for coarse dofs */
4806   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4807     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4808     const PetscInt         *idxs;
4809     ISLocalToGlobalMapping tmap;
4810 
4811     /* create map between primal indices (in local representative ordering) and local primal numbering */
4812     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4813     /* allocate space for temporary storage */
4814     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4815     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4816     /* allocate for IS array */
4817     nisdofs = pcbddc->n_ISForDofsLocal;
4818     nisneu = !!pcbddc->NeumannBoundariesLocal;
4819     nis = nisdofs + nisneu;
4820     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4821     /* dofs splitting */
4822     for (i=0;i<nisdofs;i++) {
4823       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4824       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4825       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4826       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4827       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4828       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4829       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4830       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4831     }
4832     /* neumann boundaries */
4833     if (pcbddc->NeumannBoundariesLocal) {
4834       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4835       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4836       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4837       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4838       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4839       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4840       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4841       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4842     }
4843     /* free memory */
4844     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4845     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4846     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4847   } else {
4848     nis = 0;
4849     nisdofs = 0;
4850     nisneu = 0;
4851     isarray = NULL;
4852   }
4853   /* destroy no longer needed map */
4854   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4855 
4856   /* restrict on coarse candidates (if needed) */
4857   coarse_mat_is = NULL;
4858   if (csin) {
4859     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4860       if (redist) {
4861         PetscMPIInt rank;
4862         PetscInt    spc,n_spc_p1,dest[1],destsize;
4863 
4864         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4865         spc = active_procs/ncoarse;
4866         n_spc_p1 = active_procs%ncoarse;
4867         if (im_active) {
4868           destsize = 1;
4869           if (rank > n_spc_p1*(spc+1)-1) {
4870             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4871           } else {
4872             dest[0] = rank/(spc+1);
4873           }
4874         } else {
4875           destsize = 0;
4876         }
4877         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4878       } else if (csin_type_simple) {
4879         PetscMPIInt rank;
4880         PetscInt    issize,isidx;
4881 
4882         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4883         if (im_active) {
4884           issize = 1;
4885           isidx = (PetscInt)rank;
4886         } else {
4887           issize = 0;
4888           isidx = -1;
4889         }
4890         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4891       } else { /* get a suitable subassembling pattern from MATIS code */
4892         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4893       }
4894 
4895       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4896       if (!redist || ncoarse <= void_procs) {
4897         PetscInt ncoarse_cand,tissize,*nisindices;
4898         PetscInt *coarse_candidates;
4899         const PetscInt* tisindices;
4900 
4901         /* get coarse candidates' ranks in pc communicator */
4902         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4903         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4904         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4905           if (!coarse_candidates[i]) {
4906             coarse_candidates[ncoarse_cand++]=i;
4907           }
4908         }
4909         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4910 
4911 
4912         if (pcbddc->dbg_flag) {
4913           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4914           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4915           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4916           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4917           for (i=0;i<ncoarse_cand;i++) {
4918             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4919           }
4920           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4921           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4922         }
4923         /* shift the pattern on coarse candidates */
4924         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4925         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4926         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4927         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4928         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4929         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4930         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4931       }
4932       if (pcbddc->dbg_flag) {
4933         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4934         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4935         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4936         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4937       }
4938     }
4939     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4940     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4941       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4942     } else { /* this is the last level, so use just receiving processes in subcomm */
4943       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4944     }
4945   } else {
4946     if (pcbddc->dbg_flag) {
4947       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4948       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4949       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4950     }
4951     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4952     coarse_mat_is = t_coarse_mat_is;
4953   }
4954 
4955   /* create local to global scatters for coarse problem */
4956   if (compute_vecs) {
4957     PetscInt lrows;
4958     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4959     if (coarse_mat_is) {
4960       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4961     } else {
4962       lrows = 0;
4963     }
4964     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4965     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4966     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4967     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4968     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4969   }
4970   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4971   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4972 
4973   /* set defaults for coarse KSP and PC */
4974   if (multilevel_allowed) {
4975     coarse_ksp_type = KSPRICHARDSON;
4976     coarse_pc_type = PCBDDC;
4977   } else {
4978     coarse_ksp_type = KSPPREONLY;
4979     coarse_pc_type = PCREDUNDANT;
4980   }
4981 
4982   /* print some info if requested */
4983   if (pcbddc->dbg_flag) {
4984     if (!multilevel_allowed) {
4985       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4986       if (multilevel_requested) {
4987         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4988       } else if (pcbddc->max_levels) {
4989         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4990       }
4991       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4992     }
4993   }
4994 
4995   /* create the coarse KSP object only once with defaults */
4996   if (coarse_mat_is) {
4997     MatReuse coarse_mat_reuse;
4998     PetscViewer dbg_viewer = NULL;
4999     if (pcbddc->dbg_flag) {
5000       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
5001       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5002     }
5003     if (!pcbddc->coarse_ksp) {
5004       char prefix[256],str_level[16];
5005       size_t len;
5006       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
5007       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5008       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
5009       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
5010       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
5011       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
5012       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
5013       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5014       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5015       /* prefix */
5016       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
5017       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
5018       if (!pcbddc->current_level) {
5019         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
5020         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
5021       } else {
5022         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
5023         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5024         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5025         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
5026         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
5027         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
5028       }
5029       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
5030       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5031       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
5032       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
5033       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
5034       /* allow user customization */
5035       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
5036     }
5037     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
5038     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
5039     if (nisdofs) {
5040       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
5041       for (i=0;i<nisdofs;i++) {
5042         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
5043       }
5044     }
5045     if (nisneu) {
5046       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
5047       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
5048     }
5049 
5050     /* get some info after set from options */
5051     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
5052     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
5053     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
5054     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
5055       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
5056       isbddc = PETSC_FALSE;
5057     }
5058     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
5059     if (isredundant) {
5060       KSP inner_ksp;
5061       PC  inner_pc;
5062       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
5063       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
5064       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
5065     }
5066 
5067     /* assemble coarse matrix */
5068     if (coarse_reuse) {
5069       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
5070       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
5071       coarse_mat_reuse = MAT_REUSE_MATRIX;
5072     } else {
5073       coarse_mat_reuse = MAT_INITIAL_MATRIX;
5074     }
5075     if (isbddc || isnn) {
5076       if (pcbddc->coarsening_ratio > 1) {
5077         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
5078           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
5079           if (pcbddc->dbg_flag) {
5080             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5081             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
5082             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
5083             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5084           }
5085         }
5086         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
5087       } else {
5088         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
5089         coarse_mat = coarse_mat_is;
5090       }
5091     } else {
5092       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
5093     }
5094     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
5095 
5096     /* propagate symmetry info of coarse matrix */
5097     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
5098     if (pc->pmat->symmetric_set) {
5099       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
5100     }
5101     if (pc->pmat->hermitian_set) {
5102       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
5103     }
5104     if (pc->pmat->spd_set) {
5105       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
5106     }
5107     /* set operators */
5108     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5109     if (pcbddc->dbg_flag) {
5110       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
5111     }
5112   } else { /* processes non partecipating to coarse solver (if any) */
5113     coarse_mat = 0;
5114   }
5115   ierr = PetscFree(isarray);CHKERRQ(ierr);
5116 #if 0
5117   {
5118     PetscViewer viewer;
5119     char filename[256];
5120     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
5121     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
5122     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
5123     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
5124     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
5125   }
5126 #endif
5127 
5128   /* Compute coarse null space (special handling by BDDC only) */
5129 #if 0
5130   if (pcbddc->NullSpace) {
5131     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
5132   }
5133 #endif
5134   /* hack */
5135   if (pcbddc->coarse_ksp) {
5136     Vec crhs,csol;
5137 
5138     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
5139     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
5140     if (!csol) {
5141       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
5142     }
5143     if (!crhs) {
5144       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
5145     }
5146   }
5147 
5148   /* compute null space for coarse solver if the benign trick has been requested */
5149   if (pcbddc->benign_null) {
5150 
5151     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
5152     for (i=0;i<pcbddc->benign_n;i++) {
5153       ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr);
5154     }
5155     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
5156     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
5157     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5158     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5159     if (coarse_mat) {
5160       Vec         nullv;
5161       PetscScalar *array,*array2;
5162       PetscInt    nl;
5163 
5164       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
5165       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
5166       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5167       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
5168       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
5169       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
5170       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
5171       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
5172       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
5173       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
5174     }
5175   }
5176 
5177   if (pcbddc->coarse_ksp) {
5178     PetscBool ispreonly;
5179 
5180     if (CoarseNullSpace) {
5181       PetscBool isnull;
5182       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
5183       if (isnull) {
5184         if (isbddc) {
5185           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
5186         } else {
5187           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
5188         }
5189       } else {
5190         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5191       }
5192     }
5193     /* setup coarse ksp */
5194     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
5195     /* Check coarse problem if in debug mode or if solving with an iterative method */
5196     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
5197     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
5198       KSP       check_ksp;
5199       KSPType   check_ksp_type;
5200       PC        check_pc;
5201       Vec       check_vec,coarse_vec;
5202       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
5203       PetscInt  its;
5204       PetscBool compute_eigs;
5205       PetscReal *eigs_r,*eigs_c;
5206       PetscInt  neigs;
5207       const char *prefix;
5208 
5209       /* Create ksp object suitable for estimation of extreme eigenvalues */
5210       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
5211       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
5212       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
5213       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
5214       if (ispreonly) {
5215         check_ksp_type = KSPPREONLY;
5216         compute_eigs = PETSC_FALSE;
5217       } else {
5218         check_ksp_type = KSPGMRES;
5219         compute_eigs = PETSC_TRUE;
5220       }
5221       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
5222       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
5223       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
5224       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5225       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5226       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5227       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5228       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5229       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5230       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5231       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5232       /* create random vec */
5233       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5234       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5235       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5236       if (CoarseNullSpace) {
5237         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5238       }
5239       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5240       /* solve coarse problem */
5241       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5242       if (CoarseNullSpace) {
5243         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5244       }
5245       /* set eigenvalue estimation if preonly has not been requested */
5246       if (compute_eigs) {
5247         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5248         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5249         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5250         lambda_max = eigs_r[neigs-1];
5251         lambda_min = eigs_r[0];
5252         if (pcbddc->use_coarse_estimates) {
5253           if (lambda_max>lambda_min) {
5254             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5255             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5256           }
5257         }
5258       }
5259 
5260       /* check coarse problem residual error */
5261       if (pcbddc->dbg_flag) {
5262         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5263         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5264         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5265         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5266         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5267         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5268         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5269         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5270         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5271         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5272         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5273         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5274         if (CoarseNullSpace) {
5275           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5276         }
5277         if (compute_eigs) {
5278           PetscReal lambda_max_s,lambda_min_s;
5279           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5280           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5281           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5282           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5283           for (i=0;i<neigs;i++) {
5284             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5285           }
5286         }
5287         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5288         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5289       }
5290       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5291       if (compute_eigs) {
5292         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5293         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5294       }
5295     }
5296   }
5297   /* print additional info */
5298   if (pcbddc->dbg_flag) {
5299     /* waits until all processes reaches this point */
5300     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5301     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5302     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5303   }
5304 
5305   /* free memory */
5306   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5307   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5308   PetscFunctionReturn(0);
5309 }
5310 
5311 #undef __FUNCT__
5312 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5313 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5314 {
5315   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5316   PC_IS*         pcis = (PC_IS*)pc->data;
5317   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5318   IS             subset,subset_mult,subset_n;
5319   PetscInt       local_size,coarse_size=0;
5320   PetscInt       *local_primal_indices=NULL;
5321   const PetscInt *t_local_primal_indices;
5322   PetscErrorCode ierr;
5323 
5324   PetscFunctionBegin;
5325   /* Compute global number of coarse dofs */
5326   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5327     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5328   }
5329   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5330   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5331   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5332   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5333   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5334   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5335   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5336   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5337   if (local_size != pcbddc->local_primal_size) {
5338     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5339   }
5340   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5341   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5342   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5343   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5344   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5345 
5346   /* check numbering */
5347   if (pcbddc->dbg_flag) {
5348     PetscScalar coarsesum,*array,*array2;
5349     PetscInt    i;
5350     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5351 
5352     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5353     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5354     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5355     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5356     /* counter */
5357     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5358     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5359     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5360     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5361     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5362     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5363 
5364     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5365     for (i=0;i<pcbddc->local_primal_size;i++) {
5366       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5367     }
5368     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5369     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5370     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5371     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5372     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5373     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5374     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5375     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5376     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5377     for (i=0;i<pcis->n;i++) {
5378       if (array[i] != 0.0 && array[i] != array2[i]) {
5379         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5380         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5381         set_error = PETSC_TRUE;
5382         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5383       }
5384     }
5385     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5386     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5387     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5388     for (i=0;i<pcis->n;i++) {
5389       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5390     }
5391     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5392     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5393     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5394     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5395     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5397     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5398       PetscInt *gidxs;
5399 
5400       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5401       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5402       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5403       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5404       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5405       for (i=0;i<pcbddc->local_primal_size;i++) {
5406         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5407       }
5408       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5409       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5410     }
5411     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5412     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5413   }
5414   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5415   /* get back data */
5416   *coarse_size_n = coarse_size;
5417   *local_primal_indices_n = local_primal_indices;
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 #undef __FUNCT__
5422 #define __FUNCT__ "PCBDDCGlobalToLocal"
5423 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5424 {
5425   IS             localis_t;
5426   PetscInt       i,lsize,*idxs,n;
5427   PetscScalar    *vals;
5428   PetscErrorCode ierr;
5429 
5430   PetscFunctionBegin;
5431   /* get indices in local ordering exploiting local to global map */
5432   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5433   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5434   for (i=0;i<lsize;i++) vals[i] = 1.0;
5435   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5436   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5437   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5438   if (idxs) { /* multilevel guard */
5439     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5440   }
5441   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5442   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5443   ierr = PetscFree(vals);CHKERRQ(ierr);
5444   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5445   /* now compute set in local ordering */
5446   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5447   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5448   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5449   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5450   for (i=0,lsize=0;i<n;i++) {
5451     if (PetscRealPart(vals[i]) > 0.5) {
5452       lsize++;
5453     }
5454   }
5455   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5456   for (i=0,lsize=0;i<n;i++) {
5457     if (PetscRealPart(vals[i]) > 0.5) {
5458       idxs[lsize++] = i;
5459     }
5460   }
5461   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5462   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5463   *localis = localis_t;
5464   PetscFunctionReturn(0);
5465 }
5466 
5467 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5468 #undef __FUNCT__
5469 #define __FUNCT__ "PCBDDCMatMult_Private"
5470 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5471 {
5472   PCBDDCChange_ctx change_ctx;
5473   PetscErrorCode   ierr;
5474 
5475   PetscFunctionBegin;
5476   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5477   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5478   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5479   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 #undef __FUNCT__
5484 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5485 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5486 {
5487   PCBDDCChange_ctx change_ctx;
5488   PetscErrorCode   ierr;
5489 
5490   PetscFunctionBegin;
5491   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5492   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5493   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5494   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5495   PetscFunctionReturn(0);
5496 }
5497 
5498 #undef __FUNCT__
5499 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5500 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5501 {
5502   PC_IS               *pcis=(PC_IS*)pc->data;
5503   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5504   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5505   Mat                 S_j;
5506   PetscInt            *used_xadj,*used_adjncy;
5507   PetscBool           free_used_adj;
5508   PetscErrorCode      ierr;
5509 
5510   PetscFunctionBegin;
5511   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5512   free_used_adj = PETSC_FALSE;
5513   if (pcbddc->sub_schurs_layers == -1) {
5514     used_xadj = NULL;
5515     used_adjncy = NULL;
5516   } else {
5517     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5518       used_xadj = pcbddc->mat_graph->xadj;
5519       used_adjncy = pcbddc->mat_graph->adjncy;
5520     } else if (pcbddc->computed_rowadj) {
5521       used_xadj = pcbddc->mat_graph->xadj;
5522       used_adjncy = pcbddc->mat_graph->adjncy;
5523     } else {
5524       PetscBool      flg_row=PETSC_FALSE;
5525       const PetscInt *xadj,*adjncy;
5526       PetscInt       nvtxs;
5527 
5528       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5529       if (flg_row) {
5530         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5531         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5532         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5533         free_used_adj = PETSC_TRUE;
5534       } else {
5535         pcbddc->sub_schurs_layers = -1;
5536         used_xadj = NULL;
5537         used_adjncy = NULL;
5538       }
5539       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5540     }
5541   }
5542 
5543   /* setup sub_schurs data */
5544   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5545   if (!sub_schurs->use_mumps) {
5546     /* pcbddc->ksp_D up to date only if not using MUMPS */
5547     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5548     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5549   } else {
5550     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5551     PetscBool isseqaij;
5552     if (!pcbddc->use_vertices && reuse_solvers) {
5553       PetscInt n_vertices;
5554 
5555       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5556       reuse_solvers = (PetscBool)!n_vertices;
5557     }
5558     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5559     if (!isseqaij) {
5560       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5561       if (matis->A == pcbddc->local_mat) {
5562         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5563         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5564       } else {
5565         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5566       }
5567     }
5568     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5569   }
5570   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5571 
5572   /* free adjacency */
5573   if (free_used_adj) {
5574     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5575   }
5576   PetscFunctionReturn(0);
5577 }
5578 
5579 #undef __FUNCT__
5580 #define __FUNCT__ "PCBDDCInitSubSchurs"
5581 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5582 {
5583   PC_IS               *pcis=(PC_IS*)pc->data;
5584   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5585   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5586   PCBDDCGraph         graph;
5587   PetscErrorCode      ierr;
5588 
5589   PetscFunctionBegin;
5590   /* attach interface graph for determining subsets */
5591   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5592     IS       verticesIS,verticescomm;
5593     PetscInt vsize,*idxs;
5594 
5595     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5596     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5597     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5598     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5599     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5600     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5601     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5602     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5603     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5604     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5605     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5606 /*
5607     if (pcbddc->dbg_flag) {
5608       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5609     }
5610 */
5611   } else {
5612     graph = pcbddc->mat_graph;
5613   }
5614 
5615   /* sub_schurs init */
5616   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5617 
5618   /* free graph struct */
5619   if (pcbddc->sub_schurs_rebuild) {
5620     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5621   }
5622   PetscFunctionReturn(0);
5623 }
5624 
5625 #undef __FUNCT__
5626 #define __FUNCT__ "PCBDDCCheckOperator"
5627 PetscErrorCode PCBDDCCheckOperator(PC pc)
5628 {
5629   PC_IS               *pcis=(PC_IS*)pc->data;
5630   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5631   PetscErrorCode      ierr;
5632 
5633   PetscFunctionBegin;
5634   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5635     IS             zerodiag = NULL;
5636     Mat            S_j,B0_B=NULL;
5637     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5638     PetscScalar    *p0_check,*array,*array2;
5639     PetscReal      norm;
5640     PetscInt       i;
5641 
5642     /* B0 and B0_B */
5643     if (zerodiag) {
5644       IS       dummy;
5645 
5646       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr);
5647       ierr = MatGetSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5648       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5649       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5650     }
5651     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5652     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5653     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5654     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5655     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5656     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5657     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5658     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5659     /* S_j */
5660     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5661     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5662 
5663     /* mimic vector in \widetilde{W}_\Gamma */
5664     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5665     /* continuous in primal space */
5666     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5667     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5668     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5669     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5670     ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr);
5671     for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
5672     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5673     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5674     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5675     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5676     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5677     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5678     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5679     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5680 
5681     /* assemble rhs for coarse problem */
5682     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5683     /* local with Schur */
5684     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5685     if (zerodiag) {
5686       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5687       for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
5688       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5689       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5690     }
5691     /* sum on primal nodes the local contributions */
5692     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5693     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5694     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5695     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5696     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5697     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5698     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5699     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5700     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5701     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5702     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5703     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5704     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5705     /* scale primal nodes (BDDC sums contibutions) */
5706     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5707     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5708     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5709     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5710     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5711     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5712     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5713     /* global: \widetilde{B0}_B w_\Gamma */
5714     if (zerodiag) {
5715       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5716       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5717       for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
5718       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5719     }
5720     /* BDDC */
5721     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5722     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5723 
5724     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5725     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5726     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5727     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5728     for (i=0;i<pcbddc->benign_n;i++) {
5729       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
5730     }
5731     ierr = PetscFree(p0_check);CHKERRQ(ierr);
5732     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5733     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5734     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5735     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5736     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5737   }
5738   PetscFunctionReturn(0);
5739 }
5740