xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision efe053fc694c89641577543d44b4f7465f781c89)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     PetscCheckFalse(size%r_nnodes,PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo(pc,"Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     PetscCheckFalse(nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level],PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo(pc,"Manually setting reduction to %d active processes (%d/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo(pc,"Force coarsest grid reduction to %d active processes\n",new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo(pc,"Coarse grid reduction from %d to %d active processes\n",nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     PetscCheckFalse(ncrs_eq % cr_bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move columns of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       PetscCheckFalse(new_size==nactive,PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat       mat;
403       PetscBool flg;
404       ierr = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
405       ierr = MatGetOption(Cmat, MAT_SPD, &flg);CHKERRQ(ierr);
406       if (flg) {
407         ierr = MatSetOption(mat, MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
408       } else {
409         ierr = MatGetOption(Cmat, MAT_HERMITIAN, &flg);CHKERRQ(ierr);
410         if (flg) {
411           ierr = MatSetOption(mat, MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
412         } else {
413 #if !defined(PETSC_USE_COMPLEX)
414           ierr = MatGetOption(Cmat, MAT_SYMMETRIC, &flg);CHKERRQ(ierr);
415           if (flg) {
416             ierr = MatSetOption(mat, MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
417           }
418 #endif
419         }
420       }
421       *a_Amat_crs = mat;
422     }
423     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
424 
425     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
426     /* prolongator */
427     {
428       IS       findices;
429       PetscInt Istart,Iend;
430       Mat      Pnew;
431 
432       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
433       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
434       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
435       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
436       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
437       ierr = ISDestroy(&findices);CHKERRQ(ierr);
438       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
441       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
442 
443       /* output - repartitioned */
444       *a_P_inout = Pnew;
445     }
446     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
447 
448     *a_nactive_proc = new_size; /* output */
449 
450     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
451     if (pc_gamg->cpu_pin_coarse_grids) {
452 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
453       static PetscInt llev = 2;
454       ierr = PetscInfo(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
455 #endif
456       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
457       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
458       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
459         Mat         A = *a_Amat_crs, P = *a_P_inout;
460         PetscMPIInt size;
461         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
462         if (size > 1) {
463           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
464           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
465           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
466         }
467       }
468     }
469     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
475 {
476   PetscErrorCode ierr;
477   const char     *prefix;
478   char           addp[32];
479   PC_MG          *mg      = (PC_MG*)a_pc->data;
480   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
481 
482   PetscFunctionBegin;
483   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
484   ierr = PetscInfo(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
485   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
486   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
487   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
488   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
489   if ((*Gmat2)->structurally_symmetric) {
490     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
491   } else {
492     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
493     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
494   }
495   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
496   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
497   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
498   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
499   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
500   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
501   (*Gmat2)->assembled = PETSC_TRUE;
502   PetscFunctionReturn(0);
503 }
504 
505 /* -------------------------------------------------------------------------- */
506 /*
507    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
508                     by setting data structures and options.
509 
510    Input Parameter:
511 .  pc - the preconditioner context
512 
513 */
514 PetscErrorCode PCSetUp_GAMG(PC pc)
515 {
516   PetscErrorCode ierr;
517   PC_MG          *mg      = (PC_MG*)pc->data;
518   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
519   Mat            Pmat     = pc->pmat;
520   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
521   MPI_Comm       comm;
522   PetscMPIInt    rank,size,nactivepe;
523   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
524   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
525   PetscLogDouble nnz0=0.,nnztot=0.;
526   MatInfo        info;
527   PetscBool      is_last = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
531   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
532   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
533 
534   if (pc->setupcalled) {
535     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
536       /* reset everything */
537       ierr = PCReset_MG(pc);CHKERRQ(ierr);
538       pc->setupcalled = 0;
539     } else {
540       PC_MG_Levels **mglevels = mg->levels;
541       /* just do Galerkin grids */
542       Mat          B,dA,dB;
543 
544       if (pc_gamg->Nlevels > 1) {
545         PetscInt gl;
546         /* currently only handle case where mat and pmat are the same on coarser levels */
547         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
548         /* (re)set to get dirty flag */
549         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
550 
551         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
552           MatReuse reuse = MAT_INITIAL_MATRIX ;
553 
554           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
555           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
556           if (B->product) {
557             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
558               reuse = MAT_REUSE_MATRIX;
559             }
560           }
561           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
562           if (reuse == MAT_REUSE_MATRIX) {
563             ierr = PetscInfo(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
564           } else {
565             ierr = PetscInfo(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
566           }
567           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
568           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
569           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
570           if (reuse == MAT_INITIAL_MATRIX) mglevels[level]->A = B;
571           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
572           dB   = B;
573         }
574       }
575 
576       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
577       PetscFunctionReturn(0);
578     }
579   }
580 
581   if (!pc_gamg->data) {
582     if (pc_gamg->orig_data) {
583       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
584       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
585 
586       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
587       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
588       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
589 
590       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
591       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
592     } else {
593       PetscCheckFalse(!pc_gamg->ops->createdefaultdata,comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
594       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
595     }
596   }
597 
598   /* cache original data for reuse */
599   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
600     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
601     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
602     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
603     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
604   }
605 
606   /* get basic dims */
607   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
608   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
609 
610   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
611   nnz0   = info.nz_used;
612   nnztot = info.nz_used;
613   ierr = PetscInfo(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
614 
615   /* Get A_i and R_i */
616   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
617     pc_gamg->current_level = level;
618     PetscCheckFalse(level >= PETSC_MG_MAXLEVELS,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
619     level1 = level + 1;
620     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
621 #if defined(GAMG_STAGES)
622     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
623 #endif
624     { /* construct prolongator */
625       Mat              Gmat;
626       PetscCoarsenData *agg_lists;
627       Mat              Prol11;
628 
629       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
630       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
631       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
632 
633       /* could have failed to create new level */
634       if (Prol11) {
635         const char *prefix;
636         char       addp[32];
637 
638         /* get new block size of coarse matrices */
639         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
640 
641         if (pc_gamg->ops->optprolongator) {
642           /* smooth */
643           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
644         }
645 
646         if (pc_gamg->use_aggs_in_asm) {
647           PetscInt bs;
648           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
649           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
650         }
651 
652         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
653         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
654         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
655         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
656         /* Always generate the transpose with CUDA
657            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
658         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
659         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
660         Parr[level1] = Prol11;
661       } else Parr[level1] = NULL; /* failed to coarsen */
662 
663       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
664       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
665     } /* construct prolongator scope */
666     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
667     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
668     if (!Parr[level1]) { /* failed to coarsen */
669       ierr = PetscInfo(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
670 #if defined(GAMG_STAGES)
671       ierr = PetscLogStagePop();CHKERRQ(ierr);
672 #endif
673       break;
674     }
675     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
676     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
677     PetscCheckFalse(is_last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
678     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
679     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
680     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
681 
682     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
683     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
684     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
685     nnztot += info.nz_used;
686     ierr = PetscInfo(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
687 
688 #if defined(GAMG_STAGES)
689     ierr = PetscLogStagePop();CHKERRQ(ierr);
690 #endif
691     /* stop if one node or one proc -- could pull back for singular problems */
692     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
693       ierr =  PetscInfo(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
694       level++;
695       break;
696     }
697   } /* levels */
698   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
699 
700   ierr = PetscInfo(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
701   pc_gamg->Nlevels = level + 1;
702   fine_level       = level;
703   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
704 
705   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
706 
707     /* set default smoothers & set operators */
708     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
709       KSP smoother;
710       PC  subpc;
711 
712       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
713       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
714 
715       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
716       /* set ops */
717       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
718       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
719 
720       /* set defaults */
721       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
722 
723       /* set blocks for ASM smoother that uses the 'aggregates' */
724       if (pc_gamg->use_aggs_in_asm) {
725         PetscInt sz;
726         IS       *iss;
727 
728         sz   = nASMBlocksArr[level];
729         iss  = ASMLocalIDsArr[level];
730         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
731         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
732         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
733         if (!sz) {
734           IS       is;
735           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
736           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
737           ierr = ISDestroy(&is);CHKERRQ(ierr);
738         } else {
739           PetscInt kk;
740           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
741           for (kk=0; kk<sz; kk++) {
742             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
743           }
744           ierr = PetscFree(iss);CHKERRQ(ierr);
745         }
746         ASMLocalIDsArr[level] = NULL;
747         nASMBlocksArr[level]  = 0;
748       } else {
749         ierr = PCSetType(subpc, PCJACOBI);CHKERRQ(ierr);
750       }
751     }
752     {
753       /* coarse grid */
754       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
755       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
756 
757       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
758       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
759       if (!pc_gamg->use_parallel_coarse_grid_solver) {
760         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
761         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
762         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
763         ierr = PCSetUp(subpc);CHKERRQ(ierr);
764         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
765         PetscCheckFalse(ii != 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
766         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
767         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
768         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
769         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
770         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
771       }
772     }
773 
774     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
775     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
776     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
777     ierr = PetscOptionsEnd();CHKERRQ(ierr);
778     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
779 
780     /* setup cheby eigen estimates from SA */
781     if (pc_gamg->use_sa_esteig) {
782       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
783         KSP       smoother;
784         PetscBool ischeb;
785 
786         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
787         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
788         if (ischeb) {
789           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
790 
791           // The command line will override these settings because KSPSetFromOptions is called in PCSetUp_MG
792           if (mg->max_eigen_DinvA[level] > 0) {
793             // SA uses Jacobi for P; we use SA estimates if the smoother is also Jacobi or if the user explicitly requested it.
794             // TODO: This should test whether it's the same Jacobi variant (DIAG, ROWSUM, etc.)
795             PetscReal emax,emin;
796 
797             emin = mg->min_eigen_DinvA[level];
798             emax = mg->max_eigen_DinvA[level];
799             ierr = PetscInfo(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
800             cheb->emin_provided = emin;
801             cheb->emax_provided = emax;
802           }
803         }
804       }
805     }
806 
807     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
808 
809     /* clean up */
810     for (level=1; level<pc_gamg->Nlevels; level++) {
811       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
812       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
813     }
814   } else {
815     KSP smoother;
816 
817     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
818     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
819     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
820     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
821     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 /* ------------------------------------------------------------------------- */
827 /*
828  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
829    that was created with PCCreate_GAMG().
830 
831    Input Parameter:
832 .  pc - the preconditioner context
833 
834    Application Interface Routine: PCDestroy()
835 */
836 PetscErrorCode PCDestroy_GAMG(PC pc)
837 {
838   PetscErrorCode ierr;
839   PC_MG          *mg     = (PC_MG*)pc->data;
840   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
841 
842   PetscFunctionBegin;
843   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
844   if (pc_gamg->ops->destroy) {
845     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
848   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
849   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
850   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*@
855    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
856 
857    Logically Collective on PC
858 
859    Input Parameters:
860 +  pc - the preconditioner context
861 -  n - the number of equations
862 
863    Options Database Key:
864 .  -pc_gamg_process_eq_limit <limit>
865 
866    Notes:
867     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
868           that has degrees of freedom
869 
870    Level: intermediate
871 
872 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
873 @*/
874 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
880   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
885 {
886   PC_MG   *mg      = (PC_MG*)pc->data;
887   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
888 
889   PetscFunctionBegin;
890   if (n>0) pc_gamg->min_eq_proc = n;
891   PetscFunctionReturn(0);
892 }
893 
894 /*@
895    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
896 
897  Collective on PC
898 
899    Input Parameters:
900 +  pc - the preconditioner context
901 -  n - maximum number of equations to aim for
902 
903    Options Database Key:
904 .  -pc_gamg_coarse_eq_limit <limit>
905 
906    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
907      has less than 1000 unknowns.
908 
909    Level: intermediate
910 
911 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
912 @*/
913 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
919   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
924 {
925   PC_MG   *mg      = (PC_MG*)pc->data;
926   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
927 
928   PetscFunctionBegin;
929   if (n>0) pc_gamg->coarse_eq_limit = n;
930   PetscFunctionReturn(0);
931 }
932 
933 /*@
934    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
935 
936    Collective on PC
937 
938    Input Parameters:
939 +  pc - the preconditioner context
940 -  n - PETSC_TRUE or PETSC_FALSE
941 
942    Options Database Key:
943 .  -pc_gamg_repartition <true,false>
944 
945    Notes:
946     this will generally improve the loading balancing of the work on each level
947 
948    Level: intermediate
949 
950 .seealso: ()
951 @*/
952 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
953 {
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
958   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
963 {
964   PC_MG   *mg      = (PC_MG*)pc->data;
965   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
966 
967   PetscFunctionBegin;
968   pc_gamg->repart = n;
969   PetscFunctionReturn(0);
970 }
971 
972 /*@
973    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
974 
975    Collective on PC
976 
977    Input Parameters:
978 +  pc - the preconditioner context
979 -  n - number of its
980 
981    Options Database Key:
982 .  -pc_gamg_esteig_ksp_max_it <its>
983 
984    Notes:
985 
986    Level: intermediate
987 
988 .seealso: ()
989 @*/
990 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
991 {
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
996   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1001 {
1002   PC_MG   *mg      = (PC_MG*)pc->data;
1003   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1004 
1005   PetscFunctionBegin;
1006   pc_gamg->esteig_max_it = n;
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 /*@
1011    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Chebyshev smoother
1012 
1013    Collective on PC
1014 
1015    Input Parameters:
1016 +  pc - the preconditioner context
1017 -  n - number of its
1018 
1019    Options Database Key:
1020 .  -pc_gamg_use_sa_esteig <true,false>
1021 
1022    Notes:
1023    Smoothed aggregation constructs the smoothed prolongator $P = (I - \omega D^{-1} A) T$ where $T$ is the tentative prolongator and $D$ is the diagonal of $A$.
1024    Eigenvalue estimates (based on a few CG or GMRES iterations) are computed to choose $\omega$ so that this is a stable smoothing operation.
1025    If Chebyshev with Jacobi (diagonal) preconditioning is used for smoothing, then the eigenvalue estimates can be reused.
1026    This option is only used when the smoother uses Jacobi, and should be turned off if a different PCJacobiType is used.
1027    It became default in PETSc 3.17.
1028 
1029    Level: advanced
1030 
1031 .seealso: PCGAMGSetEstEigKSPType(), KSPChebyshevSetEigenvalues(), KSPChebyshevEstEigSet()
1032 @*/
1033 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1034 {
1035   PetscErrorCode ierr;
1036 
1037   PetscFunctionBegin;
1038   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1039   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1044 {
1045   PC_MG   *mg      = (PC_MG*)pc->data;
1046   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1047 
1048   PetscFunctionBegin;
1049   pc_gamg->use_sa_esteig = n;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 /*@C
1054    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1055 
1056    Collective on PC
1057 
1058    Input Parameters:
1059 +  pc - the preconditioner context
1060 -  t - ksp type
1061 
1062    Options Database Key:
1063 .  -pc_gamg_esteig_ksp_type <type>
1064 
1065    Notes:
1066 
1067    Level: intermediate
1068 
1069 .seealso: ()
1070 @*/
1071 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1072 {
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1077   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1082 {
1083   PetscErrorCode ierr;
1084   PC_MG   *mg      = (PC_MG*)pc->data;
1085   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*@
1093    PCGAMGSetEigenvalues - Set eigenvalues
1094 
1095    Collective on PC
1096 
1097    Input Parameters:
1098 +  pc - the preconditioner context
1099 -  emax - max eigenvalue
1100 .  emin - min eigenvalue
1101 
1102    Options Database Key:
1103 .  -pc_gamg_eigenvalues
1104 
1105    Level: intermediate
1106 
1107 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1108 @*/
1109 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1110 {
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1115   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1120 {
1121   PC_MG          *mg      = (PC_MG*)pc->data;
1122   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1123 
1124   PetscFunctionBegin;
1125   PetscCheckFalse(emax <= emin,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1126   PetscCheckFalse(emax*emin <= 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1127   pc_gamg->emax = emax;
1128   pc_gamg->emin = emin;
1129 
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /*@
1134    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1135 
1136    Collective on PC
1137 
1138    Input Parameters:
1139 +  pc - the preconditioner context
1140 -  n - PETSC_TRUE or PETSC_FALSE
1141 
1142    Options Database Key:
1143 .  -pc_gamg_reuse_interpolation <true,false>
1144 
1145    Level: intermediate
1146 
1147    Notes:
1148     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1149           rebuilding the preconditioner quicker.
1150 
1151 .seealso: ()
1152 @*/
1153 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1154 {
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1159   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1164 {
1165   PC_MG   *mg      = (PC_MG*)pc->data;
1166   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1167 
1168   PetscFunctionBegin;
1169   pc_gamg->reuse_prol = n;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 /*@
1174    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1175 
1176    Collective on PC
1177 
1178    Input Parameters:
1179 +  pc - the preconditioner context
1180 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1181 
1182    Options Database Key:
1183 .  -pc_gamg_asm_use_agg
1184 
1185    Level: intermediate
1186 
1187 .seealso: ()
1188 @*/
1189 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1190 {
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1195   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1200 {
1201   PC_MG   *mg      = (PC_MG*)pc->data;
1202   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1203 
1204   PetscFunctionBegin;
1205   pc_gamg->use_aggs_in_asm = flg;
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*@
1210    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1211 
1212    Collective on PC
1213 
1214    Input Parameters:
1215 +  pc - the preconditioner context
1216 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1217 
1218    Options Database Key:
1219 .  -pc_gamg_use_parallel_coarse_grid_solver
1220 
1221    Level: intermediate
1222 
1223 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1224 @*/
1225 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1226 {
1227   PetscErrorCode ierr;
1228 
1229   PetscFunctionBegin;
1230   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1231   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1236 {
1237   PC_MG   *mg      = (PC_MG*)pc->data;
1238   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1239 
1240   PetscFunctionBegin;
1241   pc_gamg->use_parallel_coarse_grid_solver = flg;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 /*@
1246    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1247 
1248    Collective on PC
1249 
1250    Input Parameters:
1251 +  pc - the preconditioner context
1252 -  flg - PETSC_TRUE to pin coarse grids to CPU
1253 
1254    Options Database Key:
1255 .  -pc_gamg_cpu_pin_coarse_grids
1256 
1257    Level: intermediate
1258 
1259 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1260 @*/
1261 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1262 {
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1267   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1272 {
1273   PC_MG   *mg      = (PC_MG*)pc->data;
1274   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1275 
1276   PetscFunctionBegin;
1277   pc_gamg->cpu_pin_coarse_grids = flg;
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*@
1282    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1283 
1284    Collective on PC
1285 
1286    Input Parameters:
1287 +  pc - the preconditioner context
1288 -  flg - Layout type
1289 
1290    Options Database Key:
1291 .  -pc_gamg_coarse_grid_layout_type
1292 
1293    Level: intermediate
1294 
1295 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1296 @*/
1297 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1298 {
1299   PetscErrorCode ierr;
1300 
1301   PetscFunctionBegin;
1302   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1303   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1308 {
1309   PC_MG   *mg      = (PC_MG*)pc->data;
1310   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1311 
1312   PetscFunctionBegin;
1313   pc_gamg->layout_type = flg;
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 /*@
1318    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1319 
1320    Not collective on PC
1321 
1322    Input Parameters:
1323 +  pc - the preconditioner
1324 -  n - the maximum number of levels to use
1325 
1326    Options Database Key:
1327 .  -pc_mg_levels
1328 
1329    Level: intermediate
1330 
1331 .seealso: ()
1332 @*/
1333 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1334 {
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1339   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1344 {
1345   PC_MG   *mg      = (PC_MG*)pc->data;
1346   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1347 
1348   PetscFunctionBegin;
1349   pc_gamg->Nlevels = n;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 /*@
1354    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1355 
1356    Not collective on PC
1357 
1358    Input Parameters:
1359 +  pc - the preconditioner context
1360 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1361 -  n - number of threshold values provided in array
1362 
1363    Options Database Key:
1364 .  -pc_gamg_threshold <threshold>
1365 
1366    Notes:
1367     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1368     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1369 
1370     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1371     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1372     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1373 
1374    Level: intermediate
1375 
1376 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1377 @*/
1378 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1379 {
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1384   if (n) PetscValidRealPointer(v,2);
1385   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1390 {
1391   PC_MG   *mg      = (PC_MG*)pc->data;
1392   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1393   PetscInt i;
1394   PetscFunctionBegin;
1395   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1396   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 /*@
1401    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1402 
1403    Collective on PC
1404 
1405    Input Parameters:
1406 +  pc - the preconditioner context
1407 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1408 -  n - number of values provided in array
1409 
1410    Options Database Key:
1411 .  -pc_gamg_rank_reduction_factors <factors>
1412 
1413    Level: intermediate
1414 
1415 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1416 @*/
1417 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1418 {
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1423   if (n) PetscValidIntPointer(v,2);
1424   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1429 {
1430   PC_MG   *mg      = (PC_MG*)pc->data;
1431   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1432   PetscInt i;
1433   PetscFunctionBegin;
1434   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1435   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /*@
1440    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1441 
1442    Not collective on PC
1443 
1444    Input Parameters:
1445 +  pc - the preconditioner context
1446 -  scale - the threshold value reduction, ussually < 1.0
1447 
1448    Options Database Key:
1449 .  -pc_gamg_threshold_scale <v>
1450 
1451    Notes:
1452    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1453    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1454 
1455    Level: advanced
1456 
1457 .seealso: PCGAMGSetThreshold()
1458 @*/
1459 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1460 {
1461   PetscErrorCode ierr;
1462 
1463   PetscFunctionBegin;
1464   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1465   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1470 {
1471   PC_MG   *mg      = (PC_MG*)pc->data;
1472   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1473   PetscFunctionBegin;
1474   pc_gamg->threshold_scale = v;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /*@C
1479    PCGAMGSetType - Set solution method
1480 
1481    Collective on PC
1482 
1483    Input Parameters:
1484 +  pc - the preconditioner context
1485 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1486 
1487    Options Database Key:
1488 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1489 
1490    Level: intermediate
1491 
1492 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1493 @*/
1494 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1495 {
1496   PetscErrorCode ierr;
1497 
1498   PetscFunctionBegin;
1499   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1500   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 /*@C
1505    PCGAMGGetType - Get solution method
1506 
1507    Collective on PC
1508 
1509    Input Parameter:
1510 .  pc - the preconditioner context
1511 
1512    Output Parameter:
1513 .  type - the type of algorithm used
1514 
1515    Level: intermediate
1516 
1517 .seealso: PCGAMGSetType(), PCGAMGType
1518 @*/
1519 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1520 {
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1525   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1530 {
1531   PC_MG          *mg      = (PC_MG*)pc->data;
1532   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1533 
1534   PetscFunctionBegin;
1535   *type = pc_gamg->type;
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1540 {
1541   PetscErrorCode ierr,(*r)(PC);
1542   PC_MG          *mg      = (PC_MG*)pc->data;
1543   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1544 
1545   PetscFunctionBegin;
1546   pc_gamg->type = type;
1547   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1548   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1549   if (pc_gamg->ops->destroy) {
1550     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1551     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1552     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1553     /* cleaning up common data in pc_gamg - this should disapear someday */
1554     pc_gamg->data_cell_cols = 0;
1555     pc_gamg->data_cell_rows = 0;
1556     pc_gamg->orig_data_cell_cols = 0;
1557     pc_gamg->orig_data_cell_rows = 0;
1558     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1559     pc_gamg->data_sz = 0;
1560   }
1561   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1562   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1563   ierr = (*r)(pc);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr,i;
1570   PC_MG          *mg      = (PC_MG*)pc->data;
1571   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1572   PetscReal       gc=0, oc=0;
1573 
1574   PetscFunctionBegin;
1575   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1576   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1577   for (i=0;i<mg->nlevels; i++) {
1578     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1579   }
1580   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1581   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1582   if (pc_gamg->use_aggs_in_asm) {
1583     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1584   }
1585   if (pc_gamg->use_parallel_coarse_grid_solver) {
1586     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1587   }
1588 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1589   if (pc_gamg->cpu_pin_coarse_grids) {
1590     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1591   }
1592 #endif
1593   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1594   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1595   /* } else { */
1596   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1597   /* } */
1598   if (pc_gamg->ops->view) {
1599     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1600   }
1601   ierr = PCMGGetGridComplexity(pc,&gc,&oc);CHKERRQ(ierr);
1602   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g    operator = %g\n",gc,oc);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1607 {
1608   PetscErrorCode ierr;
1609   PC_MG          *mg      = (PC_MG*)pc->data;
1610   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1611   PetscBool      flag;
1612   MPI_Comm       comm;
1613   char           prefix[256],tname[32];
1614   PetscInt       i,n;
1615   const char     *pcpre;
1616   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1617   PetscFunctionBegin;
1618   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1619   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1620   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1621   if (flag) {
1622     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1623   }
1624   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1625   if (flag) {
1626     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1627   }
1628   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1629   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",pc_gamg->use_sa_esteig,&pc_gamg->use_sa_esteig,NULL);CHKERRQ(ierr);
1630   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1631   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1632   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1633   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1634   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1635   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1636   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1637   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1638   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1639   n = PETSC_MG_MAXLEVELS;
1640   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1641   if (!flag || n < PETSC_MG_MAXLEVELS) {
1642     if (!flag) n = 1;
1643     i = n;
1644     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1645   }
1646   n = PETSC_MG_MAXLEVELS;
1647   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1648   if (!flag) i = 0;
1649   else i = n;
1650   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1651   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1652   {
1653     PetscReal eminmax[2] = {0., 0.};
1654     n = 2;
1655     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1656     if (flag) {
1657       PetscCheckFalse(n != 2,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1658       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1659     }
1660   }
1661   /* set options for subtype */
1662   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1663 
1664   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1665   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1666   ierr = PetscOptionsTail();CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 /* -------------------------------------------------------------------------- */
1671 /*MC
1672      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1673 
1674    Options Database Keys:
1675 +   -pc_gamg_type <type> - one of agg, geo, or classical
1676 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1677 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1678 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1679 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1680                                         equations on each process that has degrees of freedom
1681 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1682 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1683 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1684 
1685    Options Database Keys for default Aggregation:
1686 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1687 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1688 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1689 
1690    Multigrid options:
1691 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1692 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1693 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1694 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1695 
1696   Notes:
1697     In order to obtain good performance for PCGAMG for vector valued problems you must
1698        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1699        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1700        See the Users Manual Chapter 4 for more details
1701 
1702   Level: intermediate
1703 
1704 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1705            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1706 M*/
1707 
1708 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1709 {
1710   PetscErrorCode ierr,i;
1711   PC_GAMG        *pc_gamg;
1712   PC_MG          *mg;
1713 
1714   PetscFunctionBegin;
1715    /* register AMG type */
1716   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1717 
1718   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1719   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1720   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1721 
1722   /* create a supporting struct and attach it to pc */
1723   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1724   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1725   mg           = (PC_MG*)pc->data;
1726   mg->innerctx = pc_gamg;
1727 
1728   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1729 
1730   /* these should be in subctx but repartitioning needs simple arrays */
1731   pc_gamg->data_sz = 0;
1732   pc_gamg->data    = NULL;
1733 
1734   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1735   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1736   pc->ops->setup          = PCSetUp_GAMG;
1737   pc->ops->reset          = PCReset_GAMG;
1738   pc->ops->destroy        = PCDestroy_GAMG;
1739   mg->view                = PCView_GAMG;
1740 
1741   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1742   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1743   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1744   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1745   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1746   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1747   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1748   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1749   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1750   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1751   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1752   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1753   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1754   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1755   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1761   pc_gamg->repart           = PETSC_FALSE;
1762   pc_gamg->reuse_prol       = PETSC_FALSE;
1763   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1764   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1765   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1766   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1767   pc_gamg->min_eq_proc      = 50;
1768   pc_gamg->coarse_eq_limit  = 50;
1769   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1770   pc_gamg->threshold_scale = 1.;
1771   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1772   pc_gamg->current_level    = 0; /* don't need to init really */
1773   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1774   pc_gamg->esteig_max_it    = 10;
1775   pc_gamg->use_sa_esteig    = PETSC_TRUE;
1776   pc_gamg->emin             = 0;
1777   pc_gamg->emax             = 0;
1778 
1779   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1780 
1781   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1782   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 /*@C
1787  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1788     from PCInitializePackage().
1789 
1790  Level: developer
1791 
1792  .seealso: PetscInitialize()
1793 @*/
1794 PetscErrorCode PCGAMGInitializePackage(void)
1795 {
1796   PetscErrorCode ierr;
1797   PetscInt       l;
1798 
1799   PetscFunctionBegin;
1800   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1801   PCGAMGPackageInitialized = PETSC_TRUE;
1802   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1803   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1804   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1805   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1806 
1807   /* general events */
1808   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1809   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1810   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1811   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1812   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1813   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1814   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1815 
1816   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1817   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1818   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1819   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1820   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1821   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1822   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1823   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1824   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1825   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1826   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1827   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1828   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1829   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1830   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1831   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1832   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1833   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1834     char ename[32];
1835 
1836     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1837     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1838     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1839     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1840     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1841     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1842   }
1843   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1844   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1845   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1846   /* create timer stages */
1847 #if defined(GAMG_STAGES)
1848   {
1849     char     str[32];
1850     PetscInt lidx;
1851     sprintf(str,"MG Level %d (finest)",0);
1852     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1853     for (lidx=1; lidx<9; lidx++) {
1854       sprintf(str,"MG Level %d",(int)lidx);
1855       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1856     }
1857   }
1858 #endif
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 /*@C
1863  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1864     called from PetscFinalize() automatically.
1865 
1866  Level: developer
1867 
1868  .seealso: PetscFinalize()
1869 @*/
1870 PetscErrorCode PCGAMGFinalizePackage(void)
1871 {
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   PCGAMGPackageInitialized = PETSC_FALSE;
1876   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 /*@C
1881  PCGAMGRegister - Register a PCGAMG implementation.
1882 
1883  Input Parameters:
1884  + type - string that will be used as the name of the GAMG type.
1885  - create - function for creating the gamg context.
1886 
1887   Level: advanced
1888 
1889  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1890 @*/
1891 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1892 {
1893   PetscErrorCode ierr;
1894 
1895   PetscFunctionBegin;
1896   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1897   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901