xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision 40cbb1a031ea8f2be4fe2b92dc842b003ad37be3)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <petscblaslapack.h>
7 #include <petscdm.h>
8 #include <petsc/private/kspimpl.h>
9 
10 typedef struct {
11   PetscInt  nsmooths;
12   PetscBool sym_graph;
13   PetscInt  square_graph;
14 } PC_GAMG_AGG;
15 
16 /*@
17    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
18 
19    Logically Collective on PC
20 
21    Input Parameters:
22 .  pc - the preconditioner context
23 
24    Options Database Key:
25 .  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
26 
27    Level: intermediate
28 
29 .seealso: `()`
30 @*/
31 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
32 {
33   PetscFunctionBegin;
34   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
35   PetscValidLogicalCollectiveInt(pc,n,2);
36   PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));
37   PetscFunctionReturn(0);
38 }
39 
40 static PetscErrorCode PCGAMGSetNSmooths_AGG(PC pc, PetscInt n)
41 {
42   PC_MG       *mg          = (PC_MG*)pc->data;
43   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
44   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
45 
46   PetscFunctionBegin;
47   pc_gamg_agg->nsmooths = n;
48   PetscFunctionReturn(0);
49 }
50 
51 /*@
52    PCGAMGSetSymGraph - Symmetrize the graph before computing the aggregation. Some algorithms require the graph be symmetric
53 
54    Logically Collective on PC
55 
56    Input Parameters:
57 +  pc - the preconditioner context
58 -  n - PETSC_TRUE or PETSC_FALSE
59 
60    Options Database Key:
61 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
62 
63    Level: intermediate
64 
65 .seealso: `PCGAMGSetSquareGraph()`
66 @*/
67 PetscErrorCode PCGAMGSetSymGraph(PC pc, PetscBool n)
68 {
69   PetscFunctionBegin;
70   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
71   PetscValidLogicalCollectiveBool(pc,n,2);
72   PetscTryMethod(pc,"PCGAMGSetSymGraph_C",(PC,PetscBool),(pc,n));
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode PCGAMGSetSymGraph_AGG(PC pc, PetscBool n)
77 {
78   PC_MG       *mg          = (PC_MG*)pc->data;
79   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
80   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
81 
82   PetscFunctionBegin;
83   pc_gamg_agg->sym_graph = n;
84   PetscFunctionReturn(0);
85 }
86 
87 /*@
88    PCGAMGSetSquareGraph -  Square the graph, ie. compute A'*A before aggregating it
89 
90    Logically Collective on PC
91 
92    Input Parameters:
93 +  pc - the preconditioner context
94 -  n - 0, 1 or more
95 
96    Options Database Key:
97 .  -pc_gamg_square_graph <n,default = 1> - number of levels to square the graph on before aggregating it
98 
99    Notes:
100    Squaring the graph increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
101 
102    Level: intermediate
103 
104 .seealso: `PCGAMGSetSymGraph()`, `PCGAMGSetThreshold()`
105 @*/
106 PetscErrorCode PCGAMGSetSquareGraph(PC pc, PetscInt n)
107 {
108   PetscFunctionBegin;
109   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
110   PetscValidLogicalCollectiveInt(pc,n,2);
111   PetscTryMethod(pc,"PCGAMGSetSquareGraph_C",(PC,PetscInt),(pc,n));
112   PetscFunctionReturn(0);
113 }
114 
115 static PetscErrorCode PCGAMGSetSquareGraph_AGG(PC pc, PetscInt n)
116 {
117   PC_MG       *mg          = (PC_MG*)pc->data;
118   PC_GAMG     *pc_gamg     = (PC_GAMG*)mg->innerctx;
119   PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
120 
121   PetscFunctionBegin;
122   pc_gamg_agg->square_graph = n;
123   PetscFunctionReturn(0);
124 }
125 
126 static PetscErrorCode PCSetFromOptions_GAMG_AGG(PetscOptionItems *PetscOptionsObject,PC pc)
127 {
128   PC_MG          *mg          = (PC_MG*)pc->data;
129   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
130   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
131 
132   PetscFunctionBegin;
133   PetscOptionsHeadBegin(PetscOptionsObject,"GAMG-AGG options");
134   {
135     PetscCall(PetscOptionsInt("-pc_gamg_agg_nsmooths","smoothing steps for smoothed aggregation, usually 1","PCGAMGSetNSmooths",pc_gamg_agg->nsmooths,&pc_gamg_agg->nsmooths,NULL));
136     PetscCall(PetscOptionsBool("-pc_gamg_sym_graph","Set for asymmetric matrices","PCGAMGSetSymGraph",pc_gamg_agg->sym_graph,&pc_gamg_agg->sym_graph,NULL));
137     PetscCall(PetscOptionsInt("-pc_gamg_square_graph","Number of levels to square graph for faster coarsening and lower coarse grid complexity","PCGAMGSetSquareGraph",pc_gamg_agg->square_graph,&pc_gamg_agg->square_graph,NULL));
138   }
139   PetscOptionsHeadEnd();
140   PetscFunctionReturn(0);
141 }
142 
143 /* -------------------------------------------------------------------------- */
144 static PetscErrorCode PCDestroy_GAMG_AGG(PC pc)
145 {
146   PC_MG          *mg          = (PC_MG*)pc->data;
147   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
148 
149   PetscFunctionBegin;
150   PetscCall(PetscFree(pc_gamg->subctx));
151   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL));
152   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNSmooths_C",NULL));
153   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSymGraph_C",NULL));
154   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSquareGraph_C",NULL));
155   PetscFunctionReturn(0);
156 }
157 
158 /* -------------------------------------------------------------------------- */
159 /*
160    PCSetCoordinates_AGG
161      - collective
162 
163    Input Parameter:
164    . pc - the preconditioner context
165    . ndm - dimesion of data (used for dof/vertex for Stokes)
166    . a_nloc - number of vertices local
167    . coords - [a_nloc][ndm] - interleaved coordinate data: {x_0, y_0, z_0, x_1, y_1, ...}
168 */
169 
170 static PetscErrorCode PCSetCoordinates_AGG(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
171 {
172   PC_MG          *mg      = (PC_MG*)pc->data;
173   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
174   PetscInt       arrsz,kk,ii,jj,nloc,ndatarows,ndf;
175   Mat            mat = pc->pmat;
176 
177   PetscFunctionBegin;
178   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
179   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
180   nloc = a_nloc;
181 
182   /* SA: null space vectors */
183   PetscCall(MatGetBlockSize(mat, &ndf)); /* this does not work for Stokes */
184   if (coords && ndf==1) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
185   else if (coords) {
186     PetscCheck(ndm <= ndf,PETSC_COMM_SELF,PETSC_ERR_PLIB,"degrees of motion %" PetscInt_FMT " > block size %" PetscInt_FMT,ndm,ndf);
187     pc_gamg->data_cell_cols = (ndm==2 ? 3 : 6); /* displacement elasticity */
188     if (ndm != ndf) {
189       PetscCheck(pc_gamg->data_cell_cols == ndf,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Don't know how to create null space for ndm=%" PetscInt_FMT ", ndf=%" PetscInt_FMT ".  Use MatSetNearNullSpace().",ndm,ndf);
190     }
191   } else pc_gamg->data_cell_cols = ndf; /* no data, force SA with constant null space vectors */
192   pc_gamg->data_cell_rows = ndatarows = ndf;
193   PetscCheck(pc_gamg->data_cell_cols > 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"pc_gamg->data_cell_cols %" PetscInt_FMT " <= 0",pc_gamg->data_cell_cols);
194   arrsz = nloc*pc_gamg->data_cell_rows*pc_gamg->data_cell_cols;
195 
196   if (!pc_gamg->data || (pc_gamg->data_sz != arrsz)) {
197     PetscCall(PetscFree(pc_gamg->data));
198     PetscCall(PetscMalloc1(arrsz+1, &pc_gamg->data));
199   }
200   /* copy data in - column oriented */
201   for (kk=0; kk<nloc; kk++) {
202     const PetscInt M     = nloc*pc_gamg->data_cell_rows; /* stride into data */
203     PetscReal      *data = &pc_gamg->data[kk*ndatarows]; /* start of cell */
204     if (pc_gamg->data_cell_cols==1) *data = 1.0;
205     else {
206       /* translational modes */
207       for (ii=0;ii<ndatarows;ii++) {
208         for (jj=0;jj<ndatarows;jj++) {
209           if (ii==jj)data[ii*M + jj] = 1.0;
210           else data[ii*M + jj] = 0.0;
211         }
212       }
213 
214       /* rotational modes */
215       if (coords) {
216         if (ndm == 2) {
217           data   += 2*M;
218           data[0] = -coords[2*kk+1];
219           data[1] =  coords[2*kk];
220         } else {
221           data   += 3*M;
222           data[0] = 0.0;             data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
223           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;             data[2*M+1] =  coords[3*kk];
224           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
225         }
226       }
227     }
228   }
229   pc_gamg->data_sz = arrsz;
230   PetscFunctionReturn(0);
231 }
232 
233 typedef PetscInt NState;
234 static const NState NOT_DONE=-2;
235 static const NState DELETED =-1;
236 static const NState REMOVED =-3;
237 #define IS_SELECTED(s) (s!=DELETED && s!=NOT_DONE && s!=REMOVED)
238 
239 /* -------------------------------------------------------------------------- */
240 /*
241    smoothAggs - greedy grab of with G1 (unsquared graph) -- AIJ specific
242      - AGG-MG specific: clears singletons out of 'selected_2'
243 
244    Input Parameter:
245    . Gmat_2 - global matrix of graph (data not defined)   base (squared) graph
246    . Gmat_1 - base graph to grab with                 base graph
247    Input/Output Parameter:
248    . aggs_2 - linked list of aggs with gids)
249 */
250 static PetscErrorCode smoothAggs(PC pc,Mat Gmat_2, Mat Gmat_1,PetscCoarsenData *aggs_2)
251 {
252   PetscBool      isMPI;
253   Mat_SeqAIJ     *matA_1, *matB_1=NULL;
254   MPI_Comm       comm;
255   PetscInt       lid,*ii,*idx,ix,Iend,my0,kk,n,j;
256   Mat_MPIAIJ     *mpimat_2 = NULL, *mpimat_1=NULL;
257   const PetscInt nloc      = Gmat_2->rmap->n;
258   PetscScalar    *cpcol_1_state,*cpcol_2_state,*cpcol_2_par_orig,*lid_parent_gid;
259   PetscInt       *lid_cprowID_1;
260   NState         *lid_state;
261   Vec            ghost_par_orig2;
262 
263   PetscFunctionBegin;
264   PetscCall(PetscObjectGetComm((PetscObject)Gmat_2,&comm));
265   PetscCall(MatGetOwnershipRange(Gmat_1,&my0,&Iend));
266 
267   /* get submatrices */
268   PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name,MATMPIAIJ,&isMPI));
269   if (isMPI) {
270     /* grab matrix objects */
271     mpimat_2 = (Mat_MPIAIJ*)Gmat_2->data;
272     mpimat_1 = (Mat_MPIAIJ*)Gmat_1->data;
273     matA_1   = (Mat_SeqAIJ*)mpimat_1->A->data;
274     matB_1   = (Mat_SeqAIJ*)mpimat_1->B->data;
275 
276     /* force compressed row storage for B matrix in AuxMat */
277     PetscCall(MatCheckCompressedRow(mpimat_1->B,matB_1->nonzerorowcnt,&matB_1->compressedrow,matB_1->i,Gmat_1->rmap->n,-1.0));
278 
279     PetscCall(PetscMalloc1(nloc, &lid_cprowID_1));
280     for (lid = 0; lid < nloc; lid++) lid_cprowID_1[lid] = -1;
281     for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
282       PetscInt lid = matB_1->compressedrow.rindex[ix];
283       lid_cprowID_1[lid] = ix;
284     }
285   } else {
286     PetscBool isAIJ;
287     PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name,MATSEQAIJ,&isAIJ));
288     PetscCheck(isAIJ,PETSC_COMM_SELF,PETSC_ERR_USER,"Require AIJ matrix.");
289     matA_1        = (Mat_SeqAIJ*)Gmat_1->data;
290     lid_cprowID_1 = NULL;
291   }
292   if (nloc>0) {
293     PetscCheck(!matB_1 || matB_1->compressedrow.use,PETSC_COMM_SELF,PETSC_ERR_PLIB,"matB_1 && !matB_1->compressedrow.use: PETSc bug???");
294   }
295   /* get state of locals and selected gid for deleted */
296   PetscCall(PetscMalloc2(nloc, &lid_state,nloc, &lid_parent_gid));
297   for (lid = 0; lid < nloc; lid++) {
298     lid_parent_gid[lid] = -1.0;
299     lid_state[lid]      = DELETED;
300   }
301 
302   /* set lid_state */
303   for (lid = 0; lid < nloc; lid++) {
304     PetscCDIntNd *pos;
305     PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
306     if (pos) {
307       PetscInt gid1;
308 
309       PetscCall(PetscCDIntNdGetID(pos, &gid1));
310       PetscCheck(gid1 == lid+my0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"gid1 %" PetscInt_FMT " != lid %" PetscInt_FMT " + my0 %" PetscInt_FMT,gid1,lid,my0);
311       lid_state[lid] = gid1;
312     }
313   }
314 
315   /* map local to selected local, DELETED means a ghost owns it */
316   for (lid=kk=0; lid<nloc; lid++) {
317     NState state = lid_state[lid];
318     if (IS_SELECTED(state)) {
319       PetscCDIntNd *pos;
320       PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
321       while (pos) {
322         PetscInt gid1;
323         PetscCall(PetscCDIntNdGetID(pos, &gid1));
324         PetscCall(PetscCDGetNextPos(aggs_2,lid,&pos));
325         if (gid1 >= my0 && gid1 < Iend) lid_parent_gid[gid1-my0] = (PetscScalar)(lid + my0);
326       }
327     }
328   }
329   /* get 'cpcol_1/2_state' & cpcol_2_par_orig - uses mpimat_1/2->lvec for temp space */
330   if (isMPI) {
331     Vec tempVec;
332     /* get 'cpcol_1_state' */
333     PetscCall(MatCreateVecs(Gmat_1, &tempVec, NULL));
334     for (kk=0,j=my0; kk<nloc; kk++,j++) {
335       PetscScalar v = (PetscScalar)lid_state[kk];
336       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
337     }
338     PetscCall(VecAssemblyBegin(tempVec));
339     PetscCall(VecAssemblyEnd(tempVec));
340     PetscCall(VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD));
341     PetscCall(VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD));
342     PetscCall(VecGetArray(mpimat_1->lvec, &cpcol_1_state));
343     /* get 'cpcol_2_state' */
344     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD));
345     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD));
346     PetscCall(VecGetArray(mpimat_2->lvec, &cpcol_2_state));
347     /* get 'cpcol_2_par_orig' */
348     for (kk=0,j=my0; kk<nloc; kk++,j++) {
349       PetscScalar v = (PetscScalar)lid_parent_gid[kk];
350       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
351     }
352     PetscCall(VecAssemblyBegin(tempVec));
353     PetscCall(VecAssemblyEnd(tempVec));
354     PetscCall(VecDuplicate(mpimat_2->lvec, &ghost_par_orig2));
355     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD));
356     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghost_par_orig2,INSERT_VALUES,SCATTER_FORWARD));
357     PetscCall(VecGetArray(ghost_par_orig2, &cpcol_2_par_orig));
358 
359     PetscCall(VecDestroy(&tempVec));
360   } /* ismpi */
361   for (lid=0; lid<nloc; lid++) {
362     NState state = lid_state[lid];
363     if (IS_SELECTED(state)) {
364       /* steal locals */
365       ii  = matA_1->i; n = ii[lid+1] - ii[lid];
366       idx = matA_1->j + ii[lid];
367       for (j=0; j<n; j++) {
368         PetscInt lidj   = idx[j], sgid;
369         NState   statej = lid_state[lidj];
370         if (statej==DELETED && (sgid=(PetscInt)PetscRealPart(lid_parent_gid[lidj])) != lid+my0) { /* steal local */
371           lid_parent_gid[lidj] = (PetscScalar)(lid+my0); /* send this if sgid is not local */
372           if (sgid >= my0 && sgid < Iend) {       /* I'm stealing this local from a local sgid */
373             PetscInt     hav=0,slid=sgid-my0,gidj=lidj+my0;
374             PetscCDIntNd *pos,*last=NULL;
375             /* looking for local from local so id_llist_2 works */
376             PetscCall(PetscCDGetHeadPos(aggs_2,slid,&pos));
377             while (pos) {
378               PetscInt gid;
379               PetscCall(PetscCDIntNdGetID(pos, &gid));
380               if (gid == gidj) {
381                 PetscCheck(last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"last cannot be null");
382                 PetscCall(PetscCDRemoveNextNode(aggs_2, slid, last));
383                 PetscCall(PetscCDAppendNode(aggs_2, lid, pos));
384                 hav  = 1;
385                 break;
386               } else last = pos;
387               PetscCall(PetscCDGetNextPos(aggs_2,slid,&pos));
388             }
389             if (hav != 1) {
390               PetscCheck(hav,PETSC_COMM_SELF,PETSC_ERR_PLIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
391               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"found node %" PetscInt_FMT " times???",hav);
392             }
393           } else {            /* I'm stealing this local, owned by a ghost */
394             PetscCheck(sgid == -1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Mat has an un-symmetric graph. Use '-%spc_gamg_sym_graph true' to symmetrize the graph or '-%spc_gamg_threshold -1' if the matrix is structurally symmetric.",((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "",((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "");
395             PetscCall(PetscCDAppendID(aggs_2, lid, lidj+my0));
396           }
397         }
398       } /* local neighbors */
399     } else if (state == DELETED && lid_cprowID_1) {
400       PetscInt sgidold = (PetscInt)PetscRealPart(lid_parent_gid[lid]);
401       /* see if I have a selected ghost neighbor that will steal me */
402       if ((ix=lid_cprowID_1[lid]) != -1) {
403         ii  = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
404         idx = matB_1->j + ii[ix];
405         for (j=0; j<n; j++) {
406           PetscInt cpid   = idx[j];
407           NState   statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
408           if (IS_SELECTED(statej) && sgidold != (PetscInt)statej) { /* ghost will steal this, remove from my list */
409             lid_parent_gid[lid] = (PetscScalar)statej; /* send who selected */
410             if (sgidold>=my0 && sgidold<Iend) { /* this was mine */
411               PetscInt     hav=0,oldslidj=sgidold-my0;
412               PetscCDIntNd *pos,*last=NULL;
413               /* remove from 'oldslidj' list */
414               PetscCall(PetscCDGetHeadPos(aggs_2,oldslidj,&pos));
415               while (pos) {
416                 PetscInt gid;
417                 PetscCall(PetscCDIntNdGetID(pos, &gid));
418                 if (lid+my0 == gid) {
419                   /* id_llist_2[lastid] = id_llist_2[flid];   /\* remove lid from oldslidj list *\/ */
420                   PetscCheck(last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"last cannot be null");
421                   PetscCall(PetscCDRemoveNextNode(aggs_2, oldslidj, last));
422                   /* ghost (PetscScalar)statej will add this later */
423                   hav = 1;
424                   break;
425                 } else last = pos;
426                 PetscCall(PetscCDGetNextPos(aggs_2,oldslidj,&pos));
427               }
428               if (hav != 1) {
429                 PetscCheck(hav,PETSC_COMM_SELF,PETSC_ERR_PLIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
430                 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"found node %" PetscInt_FMT " times???",hav);
431               }
432             } else {
433               /* TODO: ghosts remove this later */
434             }
435           }
436         }
437       }
438     } /* selected/deleted */
439   } /* node loop */
440 
441   if (isMPI) {
442     PetscScalar     *cpcol_2_parent,*cpcol_2_gid;
443     Vec             tempVec,ghostgids2,ghostparents2;
444     PetscInt        cpid,nghost_2;
445     PCGAMGHashTable gid_cpid;
446 
447     PetscCall(VecGetSize(mpimat_2->lvec, &nghost_2));
448     PetscCall(MatCreateVecs(Gmat_2, &tempVec, NULL));
449 
450     /* get 'cpcol_2_parent' */
451     for (kk=0,j=my0; kk<nloc; kk++,j++) {
452       PetscCall(VecSetValues(tempVec, 1, &j, &lid_parent_gid[kk], INSERT_VALUES));
453     }
454     PetscCall(VecAssemblyBegin(tempVec));
455     PetscCall(VecAssemblyEnd(tempVec));
456     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostparents2));
457     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD));
458     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostparents2,INSERT_VALUES,SCATTER_FORWARD));
459     PetscCall(VecGetArray(ghostparents2, &cpcol_2_parent));
460 
461     /* get 'cpcol_2_gid' */
462     for (kk=0,j=my0; kk<nloc; kk++,j++) {
463       PetscScalar v = (PetscScalar)j;
464       PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
465     }
466     PetscCall(VecAssemblyBegin(tempVec));
467     PetscCall(VecAssemblyEnd(tempVec));
468     PetscCall(VecDuplicate(mpimat_2->lvec, &ghostgids2));
469     PetscCall(VecScatterBegin(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD));
470     PetscCall(VecScatterEnd(mpimat_2->Mvctx,tempVec, ghostgids2,INSERT_VALUES,SCATTER_FORWARD));
471     PetscCall(VecGetArray(ghostgids2, &cpcol_2_gid));
472     PetscCall(VecDestroy(&tempVec));
473 
474     /* look for deleted ghosts and add to table */
475     PetscCall(PCGAMGHashTableCreate(2*nghost_2+1, &gid_cpid));
476     for (cpid = 0; cpid < nghost_2; cpid++) {
477       NState state = (NState)PetscRealPart(cpcol_2_state[cpid]);
478       if (state==DELETED) {
479         PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
480         PetscInt sgid_old = (PetscInt)PetscRealPart(cpcol_2_par_orig[cpid]);
481         if (sgid_old == -1 && sgid_new != -1) {
482           PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
483           PetscCall(PCGAMGHashTableAdd(&gid_cpid, gid, cpid));
484         }
485       }
486     }
487 
488     /* look for deleted ghosts and see if they moved - remove it */
489     for (lid=0; lid<nloc; lid++) {
490       NState state = lid_state[lid];
491       if (IS_SELECTED(state)) {
492         PetscCDIntNd *pos,*last=NULL;
493         /* look for deleted ghosts and see if they moved */
494         PetscCall(PetscCDGetHeadPos(aggs_2,lid,&pos));
495         while (pos) {
496           PetscInt gid;
497           PetscCall(PetscCDIntNdGetID(pos, &gid));
498 
499           if (gid < my0 || gid >= Iend) {
500             PetscCall(PCGAMGHashTableFind(&gid_cpid, gid, &cpid));
501             if (cpid != -1) {
502               /* a moved ghost - */
503               /* id_llist_2[lastid] = id_llist_2[flid];    /\* remove 'flid' from list *\/ */
504               PetscCall(PetscCDRemoveNextNode(aggs_2, lid, last));
505             } else last = pos;
506           } else last = pos;
507 
508           PetscCall(PetscCDGetNextPos(aggs_2,lid,&pos));
509         } /* loop over list of deleted */
510       } /* selected */
511     }
512     PetscCall(PCGAMGHashTableDestroy(&gid_cpid));
513 
514     /* look at ghosts, see if they changed - and it */
515     for (cpid = 0; cpid < nghost_2; cpid++) {
516       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
517       if (sgid_new >= my0 && sgid_new < Iend) { /* this is mine */
518         PetscInt     gid     = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
519         PetscInt     slid_new=sgid_new-my0,hav=0;
520         PetscCDIntNd *pos;
521 
522         /* search for this gid to see if I have it */
523         PetscCall(PetscCDGetHeadPos(aggs_2,slid_new,&pos));
524         while (pos) {
525           PetscInt gidj;
526           PetscCall(PetscCDIntNdGetID(pos, &gidj));
527           PetscCall(PetscCDGetNextPos(aggs_2,slid_new,&pos));
528 
529           if (gidj == gid) { hav = 1; break; }
530         }
531         if (hav != 1) {
532           /* insert 'flidj' into head of llist */
533           PetscCall(PetscCDAppendID(aggs_2, slid_new, gid));
534         }
535       }
536     }
537 
538     PetscCall(VecRestoreArray(mpimat_1->lvec, &cpcol_1_state));
539     PetscCall(VecRestoreArray(mpimat_2->lvec, &cpcol_2_state));
540     PetscCall(VecRestoreArray(ghostparents2, &cpcol_2_parent));
541     PetscCall(VecRestoreArray(ghostgids2, &cpcol_2_gid));
542     PetscCall(PetscFree(lid_cprowID_1));
543     PetscCall(VecDestroy(&ghostgids2));
544     PetscCall(VecDestroy(&ghostparents2));
545     PetscCall(VecDestroy(&ghost_par_orig2));
546   }
547 
548   PetscCall(PetscFree2(lid_state,lid_parent_gid));
549   PetscFunctionReturn(0);
550 }
551 
552 /* -------------------------------------------------------------------------- */
553 /*
554    PCSetData_AGG - called if data is not set with PCSetCoordinates.
555       Looks in Mat for near null space.
556       Does not work for Stokes
557 
558   Input Parameter:
559    . pc -
560    . a_A - matrix to get (near) null space out of.
561 */
562 static PetscErrorCode PCSetData_AGG(PC pc, Mat a_A)
563 {
564   PC_MG          *mg      = (PC_MG*)pc->data;
565   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
566   MatNullSpace   mnull;
567 
568   PetscFunctionBegin;
569   PetscCall(MatGetNearNullSpace(a_A, &mnull));
570   if (!mnull) {
571     DM dm;
572     PetscCall(PCGetDM(pc, &dm));
573     if (!dm) {
574       PetscCall(MatGetDM(a_A, &dm));
575     }
576     if (dm) {
577       PetscObject deformation;
578       PetscInt    Nf;
579 
580       PetscCall(DMGetNumFields(dm, &Nf));
581       if (Nf) {
582         PetscCall(DMGetField(dm, 0, NULL, &deformation));
583         PetscCall(PetscObjectQuery((PetscObject)deformation,"nearnullspace",(PetscObject*)&mnull));
584         if (!mnull) {
585           PetscCall(PetscObjectQuery((PetscObject)deformation,"nullspace",(PetscObject*)&mnull));
586         }
587       }
588     }
589   }
590 
591   if (!mnull) {
592     PetscInt bs,NN,MM;
593     PetscCall(MatGetBlockSize(a_A, &bs));
594     PetscCall(MatGetLocalSize(a_A, &MM, &NN));
595     PetscCheck(MM % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MM %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT,MM,bs);
596     PetscCall(PCSetCoordinates_AGG(pc, bs, MM/bs, NULL));
597   } else {
598     PetscReal         *nullvec;
599     PetscBool         has_const;
600     PetscInt          i,j,mlocal,nvec,bs;
601     const Vec         *vecs;
602     const PetscScalar *v;
603 
604     PetscCall(MatGetLocalSize(a_A,&mlocal,NULL));
605     PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
606     pc_gamg->data_sz = (nvec+!!has_const)*mlocal;
607     PetscCall(PetscMalloc1((nvec+!!has_const)*mlocal,&nullvec));
608     if (has_const) for (i=0; i<mlocal; i++) nullvec[i] = 1.0;
609     for (i=0; i<nvec; i++) {
610       PetscCall(VecGetArrayRead(vecs[i],&v));
611       for (j=0; j<mlocal; j++) nullvec[(i+!!has_const)*mlocal + j] = PetscRealPart(v[j]);
612       PetscCall(VecRestoreArrayRead(vecs[i],&v));
613     }
614     pc_gamg->data           = nullvec;
615     pc_gamg->data_cell_cols = (nvec+!!has_const);
616     PetscCall(MatGetBlockSize(a_A, &bs));
617     pc_gamg->data_cell_rows = bs;
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /* -------------------------------------------------------------------------- */
623 /*
624  formProl0
625 
626    Input Parameter:
627    . agg_llists - list of arrays with aggregates -- list from selected vertices of aggregate unselected vertices
628    . bs - row block size
629    . nSAvec - column bs of new P
630    . my0crs - global index of start of locals
631    . data_stride - bs*(nloc nodes + ghost nodes) [data_stride][nSAvec]
632    . data_in[data_stride*nSAvec] - local data on fine grid
633    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
634   Output Parameter:
635    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
636    . a_Prol - prolongation operator
637 */
638 static PetscErrorCode formProl0(PetscCoarsenData *agg_llists,PetscInt bs,PetscInt nSAvec,PetscInt my0crs,PetscInt data_stride,PetscReal data_in[],const PetscInt flid_fgid[],PetscReal **a_data_out,Mat a_Prol)
639 {
640   PetscInt        Istart,my0,Iend,nloc,clid,flid = 0,aggID,kk,jj,ii,mm,nSelected,minsz,nghosts,out_data_stride;
641   MPI_Comm        comm;
642   PetscReal       *out_data;
643   PetscCDIntNd    *pos;
644   PCGAMGHashTable fgid_flid;
645 
646   PetscFunctionBegin;
647   PetscCall(PetscObjectGetComm((PetscObject)a_Prol,&comm));
648   PetscCall(MatGetOwnershipRange(a_Prol, &Istart, &Iend));
649   nloc = (Iend-Istart)/bs; my0 = Istart/bs;
650   PetscCheck((Iend-Istart) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT,Iend,Istart,bs);
651   Iend   /= bs;
652   nghosts = data_stride/bs - nloc;
653 
654   PetscCall(PCGAMGHashTableCreate(2*nghosts+1, &fgid_flid));
655   for (kk=0; kk<nghosts; kk++) {
656     PetscCall(PCGAMGHashTableAdd(&fgid_flid, flid_fgid[nloc+kk], nloc+kk));
657   }
658 
659   /* count selected -- same as number of cols of P */
660   for (nSelected=mm=0; mm<nloc; mm++) {
661     PetscBool ise;
662     PetscCall(PetscCDEmptyAt(agg_llists, mm, &ise));
663     if (!ise) nSelected++;
664   }
665   PetscCall(MatGetOwnershipRangeColumn(a_Prol, &ii, &jj));
666   PetscCheck((ii/nSAvec) == my0crs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %" PetscInt_FMT " /nSAvec %" PetscInt_FMT "  != my0crs %" PetscInt_FMT,ii,nSAvec,my0crs);
667   PetscCheck(nSelected == (jj-ii)/nSAvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nSelected %" PetscInt_FMT " != (jj %" PetscInt_FMT " - ii %" PetscInt_FMT ")/nSAvec %" PetscInt_FMT,nSelected,jj,ii,nSAvec);
668 
669   /* aloc space for coarse point data (output) */
670   out_data_stride = nSelected*nSAvec;
671 
672   PetscCall(PetscMalloc1(out_data_stride*nSAvec, &out_data));
673   for (ii=0;ii<out_data_stride*nSAvec;ii++) out_data[ii]=PETSC_MAX_REAL;
674   *a_data_out = out_data; /* output - stride nSelected*nSAvec */
675 
676   /* find points and set prolongation */
677   minsz = 100;
678   for (mm = clid = 0; mm < nloc; mm++) {
679     PetscCall(PetscCDSizeAt(agg_llists, mm, &jj));
680     if (jj > 0) {
681       const PetscInt lid = mm, cgid = my0crs + clid;
682       PetscInt       cids[100]; /* max bs */
683       PetscBLASInt   asz  =jj,M=asz*bs,N=nSAvec,INFO;
684       PetscBLASInt   Mdata=M+((N-M>0) ? N-M : 0),LDA=Mdata,LWORK=N*bs;
685       PetscScalar    *qqc,*qqr,*TAU,*WORK;
686       PetscInt       *fids;
687       PetscReal      *data;
688 
689       /* count agg */
690       if (asz<minsz) minsz = asz;
691 
692       /* get block */
693       PetscCall(PetscMalloc5(Mdata*N, &qqc,M*N, &qqr,N, &TAU,LWORK, &WORK,M, &fids));
694 
695       aggID = 0;
696       PetscCall(PetscCDGetHeadPos(agg_llists,lid,&pos));
697       while (pos) {
698         PetscInt gid1;
699         PetscCall(PetscCDIntNdGetID(pos, &gid1));
700         PetscCall(PetscCDGetNextPos(agg_llists,lid,&pos));
701 
702         if (gid1 >= my0 && gid1 < Iend) flid = gid1 - my0;
703         else {
704           PetscCall(PCGAMGHashTableFind(&fgid_flid, gid1, &flid));
705           PetscCheck(flid >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot find gid1 in table");
706         }
707         /* copy in B_i matrix - column oriented */
708         data = &data_in[flid*bs];
709         for (ii = 0; ii < bs; ii++) {
710           for (jj = 0; jj < N; jj++) {
711             PetscReal d = data[jj*data_stride + ii];
712             qqc[jj*Mdata + aggID*bs + ii] = d;
713           }
714         }
715         /* set fine IDs */
716         for (kk=0; kk<bs; kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
717         aggID++;
718       }
719 
720       /* pad with zeros */
721       for (ii = asz*bs; ii < Mdata; ii++) {
722         for (jj = 0; jj < N; jj++, kk++) {
723           qqc[jj*Mdata + ii] = .0;
724         }
725       }
726 
727       /* QR */
728       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
729       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
730       PetscCall(PetscFPTrapPop());
731       PetscCheck(INFO == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"xGEQRF error");
732       /* get R - column oriented - output B_{i+1} */
733       {
734         PetscReal *data = &out_data[clid*nSAvec];
735         for (jj = 0; jj < nSAvec; jj++) {
736           for (ii = 0; ii < nSAvec; ii++) {
737             PetscCheck(data[jj*out_data_stride + ii] == PETSC_MAX_REAL,PETSC_COMM_SELF,PETSC_ERR_PLIB,"data[jj*out_data_stride + ii] != %e",(double)PETSC_MAX_REAL);
738            if (ii <= jj) data[jj*out_data_stride + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
739            else data[jj*out_data_stride + ii] = 0.;
740           }
741         }
742       }
743 
744       /* get Q - row oriented */
745       PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
746       PetscCheck(INFO == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"xORGQR error arg %" PetscBLASInt_FMT,-INFO);
747 
748       for (ii = 0; ii < M; ii++) {
749         for (jj = 0; jj < N; jj++) {
750           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
751         }
752       }
753 
754       /* add diagonal block of P0 */
755       for (kk=0; kk<N; kk++) {
756         cids[kk] = N*cgid + kk; /* global col IDs in P0 */
757       }
758       PetscCall(MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES));
759       PetscCall(PetscFree5(qqc,qqr,TAU,WORK,fids));
760       clid++;
761     } /* coarse agg */
762   } /* for all fine nodes */
763   PetscCall(MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY));
764   PetscCall(MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY));
765   PetscCall(PCGAMGHashTableDestroy(&fgid_flid));
766   PetscFunctionReturn(0);
767 }
768 
769 static PetscErrorCode PCView_GAMG_AGG(PC pc,PetscViewer viewer)
770 {
771   PC_MG          *mg      = (PC_MG*)pc->data;
772   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
773   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
774 
775   PetscFunctionBegin;
776   PetscCall(PetscViewerASCIIPrintf(viewer,"      AGG specific options\n"));
777   PetscCall(PetscViewerASCIIPrintf(viewer,"        Symmetric graph %s\n",pc_gamg_agg->sym_graph ? "true" : "false"));
778   PetscCall(PetscViewerASCIIPrintf(viewer,"        Number of levels to square graph %" PetscInt_FMT "\n",pc_gamg_agg->square_graph));
779   PetscCall(PetscViewerASCIIPrintf(viewer,"        Number smoothing steps %" PetscInt_FMT "\n",pc_gamg_agg->nsmooths));
780   PetscFunctionReturn(0);
781 }
782 
783 /* -------------------------------------------------------------------------- */
784 /*
785    PCGAMGGraph_AGG
786 
787   Input Parameter:
788    . pc - this
789    . Amat - matrix on this fine level
790   Output Parameter:
791    . a_Gmat -
792 */
793 static PetscErrorCode PCGAMGGraph_AGG(PC pc,Mat Amat,Mat *a_Gmat)
794 {
795   PC_MG                     *mg          = (PC_MG*)pc->data;
796   PC_GAMG                   *pc_gamg     = (PC_GAMG*)mg->innerctx;
797   const PetscReal           vfilter      = pc_gamg->threshold[pc_gamg->current_level];
798   PC_GAMG_AGG               *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
799   Mat                       Gmat;
800   MPI_Comm                  comm;
801   PetscBool /* set,flg , */ symm;
802 
803   PetscFunctionBegin;
804   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
805 
806   /* PetscCall(MatIsSymmetricKnown(Amat, &set, &flg)); || !(set && flg) -- this causes lot of symm calls */
807   symm = (PetscBool)(pc_gamg_agg->sym_graph); /* && !pc_gamg_agg->square_graph; */
808 
809   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_GRAPH],0,0,0,0));
810   PetscCall(PCGAMGCreateGraph(Amat, &Gmat, symm));
811   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_GRAPH],0,0,0,0));
812 
813   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_FILTER],0,0,0,0));
814   PetscCall(PCGAMGFilterGraph(&Gmat, vfilter));
815   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_FILTER],0,0,0,0));
816 
817   *a_Gmat = Gmat;
818 
819   PetscFunctionReturn(0);
820 }
821 
822 /* -------------------------------------------------------------------------- */
823 /*
824    PCGAMGCoarsen_AGG
825 
826   Input Parameter:
827    . a_pc - this
828   Input/Output Parameter:
829    . a_Gmat1 - graph on this fine level - coarsening can change this (squares it)
830   Output Parameter:
831    . agg_lists - list of aggregates
832 */
833 static PetscErrorCode PCGAMGCoarsen_AGG(PC a_pc,Mat *a_Gmat1,PetscCoarsenData **agg_lists)
834 {
835   PC_MG          *mg          = (PC_MG*)a_pc->data;
836   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
837   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
838   Mat            mat,Gmat2, Gmat1 = *a_Gmat1;  /* squared graph */
839   IS             perm;
840   PetscInt       Istart,Iend,Ii,nloc,bs,n,m;
841   PetscInt       *permute;
842   PetscBool      *bIndexSet;
843   MatCoarsen     crs;
844   MPI_Comm       comm;
845   PetscReal      hashfact;
846   PetscInt       iSwapIndex;
847   PetscRandom    random;
848 
849   PetscFunctionBegin;
850   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_COARSEN],0,0,0,0));
851   PetscCall(PetscObjectGetComm((PetscObject)Gmat1,&comm));
852   PetscCall(MatGetLocalSize(Gmat1, &n, &m));
853   PetscCall(MatGetBlockSize(Gmat1, &bs));
854   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"bs %" PetscInt_FMT " must be 1",bs);
855   nloc = n/bs;
856 
857   if (pc_gamg->current_level < pc_gamg_agg->square_graph) {
858     PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_SQUARE],0,0,0,0));
859     PetscCall(PCGAMGSquareGraph_GAMG(a_pc,Gmat1,&Gmat2));
860     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_SQUARE],0,0,0,0));
861   } else Gmat2 = Gmat1;
862 
863   /* get MIS aggs - randomize */
864   PetscCall(PetscMalloc1(nloc, &permute));
865   PetscCall(PetscCalloc1(nloc, &bIndexSet));
866   for (Ii = 0; Ii < nloc; Ii++) permute[Ii] = Ii;
867   PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&random));
868   PetscCall(MatGetOwnershipRange(Gmat1, &Istart, &Iend));
869   for (Ii = 0; Ii < nloc; Ii++) {
870     PetscCall(PetscRandomGetValueReal(random,&hashfact));
871     iSwapIndex = (PetscInt) (hashfact*nloc)%nloc;
872     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
873       PetscInt iTemp = permute[iSwapIndex];
874       permute[iSwapIndex]   = permute[Ii];
875       permute[Ii]           = iTemp;
876       bIndexSet[iSwapIndex] = PETSC_TRUE;
877     }
878   }
879   PetscCall(PetscFree(bIndexSet));
880   PetscCall(PetscRandomDestroy(&random));
881   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm));
882   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_MIS],0,0,0,0));
883   PetscCall(MatCoarsenCreate(comm, &crs));
884   PetscCall(MatCoarsenSetFromOptions(crs));
885   PetscCall(MatCoarsenSetGreedyOrdering(crs, perm));
886   PetscCall(MatCoarsenSetAdjacency(crs, Gmat2));
887   PetscCall(MatCoarsenSetStrictAggs(crs, PETSC_TRUE));
888   PetscCall(MatCoarsenApply(crs));
889   PetscCall(MatCoarsenGetData(crs, agg_lists)); /* output */
890   PetscCall(MatCoarsenDestroy(&crs));
891 
892   PetscCall(ISDestroy(&perm));
893   PetscCall(PetscFree(permute));
894   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_MIS],0,0,0,0));
895 
896   /* smooth aggs */
897   if (Gmat2 != Gmat1) {
898     const PetscCoarsenData *llist = *agg_lists;
899     PetscCall(smoothAggs(a_pc,Gmat2, Gmat1, *agg_lists));
900     PetscCall(MatDestroy(&Gmat1));
901     *a_Gmat1 = Gmat2; /* output */
902     PetscCall(PetscCDGetMat(llist, &mat));
903     PetscCheck(!mat,comm,PETSC_ERR_ARG_WRONG, "Auxilary matrix with squared graph????");
904   } else {
905     const PetscCoarsenData *llist = *agg_lists;
906     /* see if we have a matrix that takes precedence (returned from MatCoarsenApply) */
907     PetscCall(PetscCDGetMat(llist, &mat));
908     if (mat) {
909       PetscCall(MatDestroy(&Gmat1));
910       *a_Gmat1 = mat; /* output */
911     }
912   }
913   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_COARSEN],0,0,0,0));
914   PetscFunctionReturn(0);
915 }
916 
917 /* -------------------------------------------------------------------------- */
918 /*
919  PCGAMGProlongator_AGG
920 
921  Input Parameter:
922  . pc - this
923  . Amat - matrix on this fine level
924  . Graph - used to get ghost data for nodes in
925  . agg_lists - list of aggregates
926  Output Parameter:
927  . a_P_out - prolongation operator to the next level
928  */
929 static PetscErrorCode PCGAMGProlongator_AGG(PC pc,Mat Amat,Mat Gmat,PetscCoarsenData *agg_lists,Mat *a_P_out)
930 {
931   PC_MG          *mg       = (PC_MG*)pc->data;
932   PC_GAMG        *pc_gamg  = (PC_GAMG*)mg->innerctx;
933   const PetscInt col_bs = pc_gamg->data_cell_cols;
934   PetscInt       Istart,Iend,nloc,ii,jj,kk,my0,nLocalSelected,bs;
935   Mat            Prol;
936   PetscMPIInt    size;
937   MPI_Comm       comm;
938   PetscReal      *data_w_ghost;
939   PetscInt       myCrs0, nbnodes=0, *flid_fgid;
940   MatType        mtype;
941 
942   PetscFunctionBegin;
943   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
944   PetscCheck(col_bs >= 1,comm,PETSC_ERR_PLIB,"Column bs cannot be less than 1");
945   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROL],0,0,0,0));
946   PetscCallMPI(MPI_Comm_size(comm, &size));
947   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
948   PetscCall(MatGetBlockSize(Amat, &bs));
949   nloc = (Iend-Istart)/bs; my0 = Istart/bs;
950   PetscCheck((Iend-Istart) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT ") not divisible by bs %" PetscInt_FMT,Iend,Istart,bs);
951 
952   /* get 'nLocalSelected' */
953   for (ii=0, nLocalSelected = 0; ii < nloc; ii++) {
954     PetscBool ise;
955     /* filter out singletons 0 or 1? */
956     PetscCall(PetscCDEmptyAt(agg_lists, ii, &ise));
957     if (!ise) nLocalSelected++;
958   }
959 
960   /* create prolongator, create P matrix */
961   PetscCall(MatGetType(Amat,&mtype));
962   PetscCall(MatCreate(comm, &Prol));
963   PetscCall(MatSetSizes(Prol,nloc*bs,nLocalSelected*col_bs,PETSC_DETERMINE,PETSC_DETERMINE));
964   PetscCall(MatSetBlockSizes(Prol, bs, col_bs));
965   PetscCall(MatSetType(Prol, mtype));
966   PetscCall(MatSeqAIJSetPreallocation(Prol,col_bs, NULL));
967   PetscCall(MatMPIAIJSetPreallocation(Prol,col_bs, NULL, col_bs, NULL));
968 
969   /* can get all points "removed" */
970   PetscCall(MatGetSize(Prol, &kk, &ii));
971   if (!ii) {
972     PetscCall(PetscInfo(pc,"%s: No selected points on coarse grid\n",((PetscObject)pc)->prefix));
973     PetscCall(MatDestroy(&Prol));
974     *a_P_out = NULL;  /* out */
975     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL],0,0,0,0));
976     PetscFunctionReturn(0);
977   }
978   PetscCall(PetscInfo(pc,"%s: New grid %" PetscInt_FMT " nodes\n",((PetscObject)pc)->prefix,ii/col_bs));
979   PetscCall(MatGetOwnershipRangeColumn(Prol, &myCrs0, &kk));
980 
981   PetscCheck((kk-myCrs0) % col_bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(kk %" PetscInt_FMT " -myCrs0 %" PetscInt_FMT ") not divisible by col_bs %" PetscInt_FMT,kk,myCrs0,col_bs);
982   myCrs0 = myCrs0/col_bs;
983   PetscCheck((kk/col_bs-myCrs0) == nLocalSelected,PETSC_COMM_SELF,PETSC_ERR_PLIB,"(kk %" PetscInt_FMT "/col_bs %" PetscInt_FMT " - myCrs0 %" PetscInt_FMT ") != nLocalSelected %" PetscInt_FMT ")",kk,col_bs,myCrs0,nLocalSelected);
984 
985   /* create global vector of data in 'data_w_ghost' */
986   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLA],0,0,0,0));
987   if (size > 1) { /*  */
988     PetscReal *tmp_gdata,*tmp_ldata,*tp2;
989     PetscCall(PetscMalloc1(nloc, &tmp_ldata));
990     for (jj = 0; jj < col_bs; jj++) {
991       for (kk = 0; kk < bs; kk++) {
992         PetscInt        ii,stride;
993         const PetscReal *tp = pc_gamg->data + jj*bs*nloc + kk;
994         for (ii = 0; ii < nloc; ii++, tp += bs) tmp_ldata[ii] = *tp;
995 
996         PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, tmp_ldata, &stride, &tmp_gdata));
997 
998         if (!jj && !kk) { /* now I know how many todal nodes - allocate */
999           PetscCall(PetscMalloc1(stride*bs*col_bs, &data_w_ghost));
1000           nbnodes = bs*stride;
1001         }
1002         tp2 = data_w_ghost + jj*bs*stride + kk;
1003         for (ii = 0; ii < stride; ii++, tp2 += bs) *tp2 = tmp_gdata[ii];
1004         PetscCall(PetscFree(tmp_gdata));
1005       }
1006     }
1007     PetscCall(PetscFree(tmp_ldata));
1008   } else {
1009     nbnodes      = bs*nloc;
1010     data_w_ghost = (PetscReal*)pc_gamg->data;
1011   }
1012 
1013   /* get P0 */
1014   if (size > 1) {
1015     PetscReal *fid_glid_loc,*fiddata;
1016     PetscInt  stride;
1017 
1018     PetscCall(PetscMalloc1(nloc, &fid_glid_loc));
1019     for (kk=0; kk<nloc; kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
1020     PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, fid_glid_loc, &stride, &fiddata));
1021     PetscCall(PetscMalloc1(stride, &flid_fgid));
1022     for (kk=0; kk<stride; kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1023     PetscCall(PetscFree(fiddata));
1024 
1025     PetscCheck(stride == nbnodes/bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"stride %" PetscInt_FMT " != nbnodes %" PetscInt_FMT "/bs %" PetscInt_FMT,stride,nbnodes,bs);
1026     PetscCall(PetscFree(fid_glid_loc));
1027   } else {
1028     PetscCall(PetscMalloc1(nloc, &flid_fgid));
1029     for (kk=0; kk<nloc; kk++) flid_fgid[kk] = my0 + kk;
1030   }
1031   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLA],0,0,0,0));
1032   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLB],0,0,0,0));
1033   {
1034     PetscReal *data_out = NULL;
1035     PetscCall(formProl0(agg_lists, bs, col_bs, myCrs0, nbnodes,data_w_ghost, flid_fgid, &data_out, Prol));
1036     PetscCall(PetscFree(pc_gamg->data));
1037 
1038     pc_gamg->data           = data_out;
1039     pc_gamg->data_cell_rows = col_bs;
1040     pc_gamg->data_sz        = col_bs*col_bs*nLocalSelected;
1041   }
1042   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLB],0,0,0,0));
1043   if (size > 1) {PetscCall(PetscFree(data_w_ghost));}
1044   PetscCall(PetscFree(flid_fgid));
1045 
1046   *a_P_out = Prol;  /* out */
1047 
1048   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL],0,0,0,0));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 /* -------------------------------------------------------------------------- */
1053 /*
1054    PCGAMGOptProlongator_AGG
1055 
1056   Input Parameter:
1057    . pc - this
1058    . Amat - matrix on this fine level
1059  In/Output Parameter:
1060    . a_P - prolongation operator to the next level
1061 */
1062 static PetscErrorCode PCGAMGOptProlongator_AGG(PC pc,Mat Amat,Mat *a_P)
1063 {
1064   PC_MG          *mg          = (PC_MG*)pc->data;
1065   PC_GAMG        *pc_gamg     = (PC_GAMG*)mg->innerctx;
1066   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
1067   PetscInt       jj;
1068   Mat            Prol  = *a_P;
1069   MPI_Comm       comm;
1070   KSP            eksp;
1071   Vec            bb, xx;
1072   PC             epc;
1073   PetscReal      alpha, emax, emin;
1074 
1075   PetscFunctionBegin;
1076   PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm));
1077   PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPT],0,0,0,0));
1078 
1079   /* compute maximum singular value of operator to be used in smoother */
1080   if (0 < pc_gamg_agg->nsmooths) {
1081     /* get eigen estimates */
1082     if (pc_gamg->emax > 0) {
1083       emin = pc_gamg->emin;
1084       emax = pc_gamg->emax;
1085     } else {
1086       const char *prefix;
1087 
1088       PetscCall(MatCreateVecs(Amat, &bb, NULL));
1089       PetscCall(MatCreateVecs(Amat, &xx, NULL));
1090       PetscCall(KSPSetNoisy_Private(bb));
1091 
1092       PetscCall(KSPCreate(comm,&eksp));
1093       PetscCall(PCGetOptionsPrefix(pc,&prefix));
1094       PetscCall(KSPSetOptionsPrefix(eksp,prefix));
1095       PetscCall(KSPAppendOptionsPrefix(eksp,"pc_gamg_esteig_"));
1096       {
1097         PetscBool sflg;
1098         PetscCall(MatGetOption(Amat, MAT_SPD, &sflg));
1099         if (sflg) {
1100           PetscCall(KSPSetType(eksp, KSPCG));
1101         }
1102       }
1103       PetscCall(KSPSetErrorIfNotConverged(eksp,pc->erroriffailure));
1104       PetscCall(KSPSetNormType(eksp, KSP_NORM_NONE));
1105 
1106       PetscCall(KSPSetInitialGuessNonzero(eksp, PETSC_FALSE));
1107       PetscCall(KSPSetOperators(eksp, Amat, Amat));
1108 
1109       PetscCall(KSPGetPC(eksp, &epc));
1110       PetscCall(PCSetType(epc, PCJACOBI));  /* smoother in smoothed agg. */
1111 
1112       PetscCall(KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT, 10)); // 10 is safer, but 5 is often fine, can override with -pc_gamg_esteig_ksp_max_it -mg_levels_ksp_chebyshev_esteig 0,0.25,0,1.2
1113 
1114       PetscCall(KSPSetFromOptions(eksp));
1115       PetscCall(KSPSetComputeSingularValues(eksp,PETSC_TRUE));
1116       PetscCall(KSPSolve(eksp, bb, xx));
1117       PetscCall(KSPCheckSolve(eksp,pc,xx));
1118 
1119       PetscCall(KSPComputeExtremeSingularValues(eksp, &emax, &emin));
1120       PetscCall(PetscInfo(pc,"%s: Smooth P0: max eigen=%e min=%e PC=%s\n",((PetscObject)pc)->prefix,(double)emax,(double)emin,PCJACOBI));
1121       PetscCall(VecDestroy(&xx));
1122       PetscCall(VecDestroy(&bb));
1123       PetscCall(KSPDestroy(&eksp));
1124     }
1125     if (pc_gamg->use_sa_esteig) {
1126       mg->min_eigen_DinvA[pc_gamg->current_level] = emin;
1127       mg->max_eigen_DinvA[pc_gamg->current_level] = emax;
1128       PetscCall(PetscInfo(pc,"%s: Smooth P0: level %" PetscInt_FMT ", cache spectra %g %g\n",((PetscObject)pc)->prefix,pc_gamg->current_level,(double)emin,(double)emax));
1129     } else {
1130       mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1131       mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1132     }
1133   } else {
1134     mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1135     mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1136   }
1137 
1138   /* smooth P0 */
1139   for (jj = 0; jj < pc_gamg_agg->nsmooths; jj++) {
1140     Mat tMat;
1141     Vec diag;
1142 
1143     PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPTSM],0,0,0,0));
1144 
1145     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1146     PetscCall(PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2],0,0,0,0));
1147     PetscCall(MatMatMult(Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat));
1148     PetscCall(PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2],0,0,0,0));
1149     PetscCall(MatProductClear(tMat));
1150     PetscCall(MatCreateVecs(Amat, &diag, NULL));
1151     PetscCall(MatGetDiagonal(Amat, diag)); /* effectively PCJACOBI */
1152     PetscCall(VecReciprocal(diag));
1153     PetscCall(MatDiagonalScale(tMat, diag, NULL));
1154     PetscCall(VecDestroy(&diag));
1155 
1156     /* TODO: Set a PCFailedReason and exit the building of the AMG preconditioner */
1157     PetscCheck(emax != 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Computed maximum singular value as zero");
1158     /* TODO: Document the 1.4 and don't hardwire it in this routine */
1159     alpha = -1.4/emax;
1160 
1161     PetscCall(MatAYPX(tMat, alpha, Prol, SUBSET_NONZERO_PATTERN));
1162     PetscCall(MatDestroy(&Prol));
1163     Prol = tMat;
1164     PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPTSM],0,0,0,0));
1165   }
1166   PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPT],0,0,0,0));
1167   *a_P = Prol;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 /* -------------------------------------------------------------------------- */
1172 /*
1173    PCCreateGAMG_AGG
1174 
1175   Input Parameter:
1176    . pc -
1177 */
1178 PetscErrorCode  PCCreateGAMG_AGG(PC pc)
1179 {
1180   PC_MG          *mg      = (PC_MG*)pc->data;
1181   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1182   PC_GAMG_AGG    *pc_gamg_agg;
1183 
1184   PetscFunctionBegin;
1185   /* create sub context for SA */
1186   PetscCall(PetscNewLog(pc,&pc_gamg_agg));
1187   pc_gamg->subctx = pc_gamg_agg;
1188 
1189   pc_gamg->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1190   pc_gamg->ops->destroy        = PCDestroy_GAMG_AGG;
1191   /* reset does not do anything; setup not virtual */
1192 
1193   /* set internal function pointers */
1194   pc_gamg->ops->graph             = PCGAMGGraph_AGG;
1195   pc_gamg->ops->coarsen           = PCGAMGCoarsen_AGG;
1196   pc_gamg->ops->prolongator       = PCGAMGProlongator_AGG;
1197   pc_gamg->ops->optprolongator    = PCGAMGOptProlongator_AGG;
1198   pc_gamg->ops->createdefaultdata = PCSetData_AGG;
1199   pc_gamg->ops->view              = PCView_GAMG_AGG;
1200 
1201   pc_gamg_agg->square_graph = 1;
1202   pc_gamg_agg->sym_graph    = PETSC_FALSE;
1203   pc_gamg_agg->nsmooths     = 1;
1204 
1205   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNSmooths_C",PCGAMGSetNSmooths_AGG));
1206   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSymGraph_C",PCGAMGSetSymGraph_AGG));
1207   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetSquareGraph_C",PCGAMGSetSquareGraph_AGG));
1208   PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_AGG));
1209   PetscFunctionReturn(0);
1210 }
1211