xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 2de708cb9e330fedcb6a5a982256b519aeb72fbe)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     PetscCheckFalse(size%r_nnodes,PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo(pc,"Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     PetscCheckFalse(nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level],PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo(pc,"Manually setting reduction to %d active processes (%d/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo(pc,"Force coarsest grid reduction to %d active processes\n",new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo(pc,"Coarse grid reduction from %d to %d active processes\n",nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     PetscCheckFalse(ncrs_eq % cr_bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move columns of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       PetscCheckFalse(new_size==nactive,PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat       mat;
403       PetscBool flg;
404       ierr = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
405       ierr = MatGetOption(Cmat, MAT_SPD, &flg);CHKERRQ(ierr);
406       if (flg) {
407         ierr = MatSetOption(mat, MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
408       } else {
409         ierr = MatGetOption(Cmat, MAT_HERMITIAN, &flg);CHKERRQ(ierr);
410         if (flg) {
411           ierr = MatSetOption(mat, MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
412         } else {
413 #if !defined(PETSC_USE_COMPLEX)
414           ierr = MatGetOption(Cmat, MAT_SYMMETRIC, &flg);CHKERRQ(ierr);
415           if (flg) {
416             ierr = MatSetOption(mat, MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
417           }
418 #endif
419         }
420       }
421       *a_Amat_crs = mat;
422     }
423     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
424 
425     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
426     /* prolongator */
427     {
428       IS       findices;
429       PetscInt Istart,Iend;
430       Mat      Pnew;
431 
432       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
433       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
434       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
435       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
436       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
437       ierr = ISDestroy(&findices);CHKERRQ(ierr);
438       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
441       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
442 
443       /* output - repartitioned */
444       *a_P_inout = Pnew;
445     }
446     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
447 
448     *a_nactive_proc = new_size; /* output */
449 
450     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
451     if (pc_gamg->cpu_pin_coarse_grids) {
452 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
453       static PetscInt llev = 2;
454       ierr = PetscInfo(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
455 #endif
456       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
457       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
458       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
459         Mat         A = *a_Amat_crs, P = *a_P_inout;
460         PetscMPIInt size;
461         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
462         if (size > 1) {
463           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
464           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
465           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
466         }
467       }
468     }
469     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
475 {
476   PetscErrorCode ierr;
477   const char     *prefix;
478   char           addp[32];
479   PC_MG          *mg      = (PC_MG*)a_pc->data;
480   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
481 
482   PetscFunctionBegin;
483   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
484   ierr = PetscInfo(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
485   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
486   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
487   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
488   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
489   if ((*Gmat2)->structurally_symmetric) {
490     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
491   } else {
492     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
493     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
494   }
495   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
496   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
497   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
498   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
499   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
500   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
501   (*Gmat2)->assembled = PETSC_TRUE;
502   PetscFunctionReturn(0);
503 }
504 
505 /* -------------------------------------------------------------------------- */
506 /*
507    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
508                     by setting data structures and options.
509 
510    Input Parameter:
511 .  pc - the preconditioner context
512 
513 */
514 PetscErrorCode PCSetUp_GAMG(PC pc)
515 {
516   PetscErrorCode ierr;
517   PC_MG          *mg      = (PC_MG*)pc->data;
518   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
519   Mat            Pmat     = pc->pmat;
520   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
521   MPI_Comm       comm;
522   PetscMPIInt    rank,size,nactivepe;
523   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
524   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
525   PetscLogDouble nnz0=0.,nnztot=0.;
526   MatInfo        info;
527   PetscBool      is_last = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
531   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
532   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
533 
534   if (pc->setupcalled) {
535     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
536       /* reset everything */
537       ierr = PCReset_MG(pc);CHKERRQ(ierr);
538       pc->setupcalled = 0;
539     } else {
540       PC_MG_Levels **mglevels = mg->levels;
541       /* just do Galerkin grids */
542       Mat          B,dA,dB;
543 
544       if (pc_gamg->Nlevels > 1) {
545         PetscInt gl;
546         /* currently only handle case where mat and pmat are the same on coarser levels */
547         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
548         /* (re)set to get dirty flag */
549         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
550 
551         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
552           MatReuse reuse = MAT_INITIAL_MATRIX ;
553 
554           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
555           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
556           if (B->product) {
557             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
558               reuse = MAT_REUSE_MATRIX;
559             }
560           }
561           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
562           if (reuse == MAT_REUSE_MATRIX) {
563             ierr = PetscInfo(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
564           } else {
565             ierr = PetscInfo(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
566           }
567           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
568           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
569           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
570           if (reuse == MAT_INITIAL_MATRIX) mglevels[level]->A = B;
571           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
572           dB   = B;
573         }
574       }
575 
576       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
577       PetscFunctionReturn(0);
578     }
579   }
580 
581   if (!pc_gamg->data) {
582     if (pc_gamg->orig_data) {
583       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
584       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
585 
586       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
587       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
588       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
589 
590       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
591       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
592     } else {
593       PetscCheckFalse(!pc_gamg->ops->createdefaultdata,comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
594       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
595     }
596   }
597 
598   /* cache original data for reuse */
599   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
600     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
601     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
602     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
603     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
604   }
605 
606   /* get basic dims */
607   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
608   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
609 
610   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
611   nnz0   = info.nz_used;
612   nnztot = info.nz_used;
613   ierr = PetscInfo(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
614 
615   /* Get A_i and R_i */
616   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
617     pc_gamg->current_level = level;
618     PetscCheckFalse(level >= PETSC_MG_MAXLEVELS,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
619     level1 = level + 1;
620     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
621 #if defined(GAMG_STAGES)
622     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
623 #endif
624     { /* construct prolongator */
625       Mat              Gmat;
626       PetscCoarsenData *agg_lists;
627       Mat              Prol11;
628 
629       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
630       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
631       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
632 
633       /* could have failed to create new level */
634       if (Prol11) {
635         const char *prefix;
636         char       addp[32];
637 
638         /* get new block size of coarse matrices */
639         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
640 
641         if (pc_gamg->ops->optprolongator) {
642           /* smooth */
643           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
644         }
645 
646         if (pc_gamg->use_aggs_in_asm) {
647           PetscInt bs;
648           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
649           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
650         }
651 
652         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
653         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
654         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
655         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
656         /* Always generate the transpose with CUDA
657            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
658         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
659         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
660         Parr[level1] = Prol11;
661       } else Parr[level1] = NULL; /* failed to coarsen */
662 
663       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
664       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
665     } /* construct prolongator scope */
666     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
667     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
668     if (!Parr[level1]) { /* failed to coarsen */
669       ierr = PetscInfo(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
670 #if defined(GAMG_STAGES)
671       ierr = PetscLogStagePop();CHKERRQ(ierr);
672 #endif
673       break;
674     }
675     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
676     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
677     PetscCheckFalse(is_last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
678     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
679     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
680     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
681 
682     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
683     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
684     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
685     nnztot += info.nz_used;
686     ierr = PetscInfo(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
687 
688 #if defined(GAMG_STAGES)
689     ierr = PetscLogStagePop();CHKERRQ(ierr);
690 #endif
691     /* stop if one node or one proc -- could pull back for singular problems */
692     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
693       ierr =  PetscInfo(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
694       level++;
695       break;
696     }
697   } /* levels */
698   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
699 
700   ierr = PetscInfo(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
701   pc_gamg->Nlevels = level + 1;
702   fine_level       = level;
703   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
704 
705   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
706     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
707 
708     /* set default smoothers & set operators */
709     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
710       KSP smoother;
711       PC  subpc;
712 
713       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
714       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
715 
716       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
717       /* set ops */
718       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
719       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
720 
721       /* set defaults */
722       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
723 
724       /* set blocks for ASM smoother that uses the 'aggregates' */
725       if (pc_gamg->use_aggs_in_asm) {
726         PetscInt sz;
727         IS       *iss;
728 
729         sz   = nASMBlocksArr[level];
730         iss  = ASMLocalIDsArr[level];
731         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
732         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
733         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
734         if (!sz) {
735           IS       is;
736           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
737           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
738           ierr = ISDestroy(&is);CHKERRQ(ierr);
739         } else {
740           PetscInt kk;
741           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
742           for (kk=0; kk<sz; kk++) {
743             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
744           }
745           ierr = PetscFree(iss);CHKERRQ(ierr);
746         }
747         ASMLocalIDsArr[level] = NULL;
748         nASMBlocksArr[level]  = 0;
749       } else {
750         ierr = PCSetType(subpc, PCJACOBI);CHKERRQ(ierr);
751       }
752     }
753     {
754       /* coarse grid */
755       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
756       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
757 
758       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
759       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
760       if (!pc_gamg->use_parallel_coarse_grid_solver) {
761         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
762         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
763         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
764         ierr = PCSetUp(subpc);CHKERRQ(ierr);
765         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
766         PetscCheckFalse(ii != 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
767         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
768         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
769         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
770         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
771         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
772       }
773     }
774 
775     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
776     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
777     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
778     ierr = PetscOptionsEnd();CHKERRQ(ierr);
779     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
780 
781     /* setup cheby eigen estimates from SA */
782     if (pc_gamg->use_sa_esteig) {
783       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
784         KSP       smoother;
785         PetscBool ischeb;
786 
787         savesetfromoptions[level] = NULL;
788         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
789         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
790         if (ischeb) {
791           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
792 
793           // The command line will override these settings because KSPSetFromOptions is called in PCSetUp_MG
794           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
795             PC        subpc;
796             PetscBool isjac;
797             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
798             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
799             if (isjac || pc_gamg->use_sa_esteig_set) {
800               // SA uses Jacobi for P; we use SA estimates if the smoother is also Jacobi or if the user explicitly requested it.
801               // TODO: This should test whether it's the same Jacobi variant (DIAG, ROWSUM, etc.)
802               PetscReal emax,emin;
803 
804               emin = mg->min_eigen_DinvA[level];
805               emax = mg->max_eigen_DinvA[level];
806               ierr = PetscInfo(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
807               cheb->emin_computed = emin;
808               cheb->emax_computed = emax;
809               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
810 
811               /* We have set the eigenvalues and consumed the transformation values
812                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
813                  below when setfromoptions will be called again */
814               savesetfromoptions[level] = smoother->ops->setfromoptions;
815               smoother->ops->setfromoptions = NULL;
816             }
817           }
818         }
819       }
820     }
821 
822     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
823 
824     /* restore Chebyshev smoother for next calls */
825     if (pc_gamg->use_sa_esteig) {
826       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
827         if (savesetfromoptions[level]) {
828           KSP smoother;
829           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
830           smoother->ops->setfromoptions = savesetfromoptions[level];
831         }
832       }
833     }
834 
835     /* clean up */
836     for (level=1; level<pc_gamg->Nlevels; level++) {
837       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
838       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
839     }
840   } else {
841     KSP smoother;
842 
843     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
844     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
845     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
846     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
847     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 /* ------------------------------------------------------------------------- */
853 /*
854  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
855    that was created with PCCreate_GAMG().
856 
857    Input Parameter:
858 .  pc - the preconditioner context
859 
860    Application Interface Routine: PCDestroy()
861 */
862 PetscErrorCode PCDestroy_GAMG(PC pc)
863 {
864   PetscErrorCode ierr;
865   PC_MG          *mg     = (PC_MG*)pc->data;
866   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
867 
868   PetscFunctionBegin;
869   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
870   if (pc_gamg->ops->destroy) {
871     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
874   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
875   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
876   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 /*@
881    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
882 
883    Logically Collective on PC
884 
885    Input Parameters:
886 +  pc - the preconditioner context
887 -  n - the number of equations
888 
889    Options Database Key:
890 .  -pc_gamg_process_eq_limit <limit>
891 
892    Notes:
893     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
894           that has degrees of freedom
895 
896    Level: intermediate
897 
898 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
899 @*/
900 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
901 {
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
906   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
907   PetscFunctionReturn(0);
908 }
909 
910 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
911 {
912   PC_MG   *mg      = (PC_MG*)pc->data;
913   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
914 
915   PetscFunctionBegin;
916   if (n>0) pc_gamg->min_eq_proc = n;
917   PetscFunctionReturn(0);
918 }
919 
920 /*@
921    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
922 
923  Collective on PC
924 
925    Input Parameters:
926 +  pc - the preconditioner context
927 -  n - maximum number of equations to aim for
928 
929    Options Database Key:
930 .  -pc_gamg_coarse_eq_limit <limit>
931 
932    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
933      has less than 1000 unknowns.
934 
935    Level: intermediate
936 
937 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
938 @*/
939 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
940 {
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
945   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
950 {
951   PC_MG   *mg      = (PC_MG*)pc->data;
952   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
953 
954   PetscFunctionBegin;
955   if (n>0) pc_gamg->coarse_eq_limit = n;
956   PetscFunctionReturn(0);
957 }
958 
959 /*@
960    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
961 
962    Collective on PC
963 
964    Input Parameters:
965 +  pc - the preconditioner context
966 -  n - PETSC_TRUE or PETSC_FALSE
967 
968    Options Database Key:
969 .  -pc_gamg_repartition <true,false>
970 
971    Notes:
972     this will generally improve the loading balancing of the work on each level
973 
974    Level: intermediate
975 
976 .seealso: ()
977 @*/
978 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
979 {
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
984   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
989 {
990   PC_MG   *mg      = (PC_MG*)pc->data;
991   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
992 
993   PetscFunctionBegin;
994   pc_gamg->repart = n;
995   PetscFunctionReturn(0);
996 }
997 
998 /*@
999    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
1000 
1001    Collective on PC
1002 
1003    Input Parameters:
1004 +  pc - the preconditioner context
1005 -  n - number of its
1006 
1007    Options Database Key:
1008 .  -pc_gamg_esteig_ksp_max_it <its>
1009 
1010    Notes:
1011 
1012    Level: intermediate
1013 
1014 .seealso: ()
1015 @*/
1016 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
1017 {
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1022   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1027 {
1028   PC_MG   *mg      = (PC_MG*)pc->data;
1029   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1030 
1031   PetscFunctionBegin;
1032   pc_gamg->esteig_max_it = n;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 /*@
1037    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Chebyshev smoother
1038 
1039    Collective on PC
1040 
1041    Input Parameters:
1042 +  pc - the preconditioner context
1043 -  n - number of its
1044 
1045    Options Database Key:
1046 .  -pc_gamg_use_sa_esteig <true,false>
1047 
1048    Notes:
1049    Smoothed aggregation constructs the smoothed prolongator $P = (I - \omega D^{-1} A) T$ where $T$ is the tentative prolongator and $D$ is the diagonal of $A$.
1050    Eigenvalue estimates (based on a few CG or GMRES iterations) are computed to choose $\omega$ so that this is a stable smoothing operation.
1051    If Chebyshev with Jacobi (diagonal) preconditioning is used for smoothing, then the eigenvalue estimates can be reused.
1052    This option became default in PETSc 3.17 when the smoother uses Jacobi, otherwise it's off by default.
1053 
1054    Level: advanced
1055 
1056 .seealso: PCGAMGSetEstEigKSPType(), KSPChebyshevSetEigenvalues(), KSPChebyshevEstEigSet()
1057 @*/
1058 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1059 {
1060   PetscErrorCode ierr;
1061 
1062   PetscFunctionBegin;
1063   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1064   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1069 {
1070   PC_MG   *mg      = (PC_MG*)pc->data;
1071   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1072 
1073   PetscFunctionBegin;
1074   pc_gamg->use_sa_esteig = n;
1075   pc_gamg->use_sa_esteig_set = PETSC_TRUE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*@C
1080    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1081 
1082    Collective on PC
1083 
1084    Input Parameters:
1085 +  pc - the preconditioner context
1086 -  t - ksp type
1087 
1088    Options Database Key:
1089 .  -pc_gamg_esteig_ksp_type <type>
1090 
1091    Notes:
1092 
1093    Level: intermediate
1094 
1095 .seealso: ()
1096 @*/
1097 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1098 {
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1103   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1108 {
1109   PetscErrorCode ierr;
1110   PC_MG   *mg      = (PC_MG*)pc->data;
1111   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1112 
1113   PetscFunctionBegin;
1114   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*@
1119    PCGAMGSetEigenvalues - Set eigenvalues
1120 
1121    Collective on PC
1122 
1123    Input Parameters:
1124 +  pc - the preconditioner context
1125 -  emax - max eigenvalue
1126 .  emin - min eigenvalue
1127 
1128    Options Database Key:
1129 .  -pc_gamg_eigenvalues
1130 
1131    Level: intermediate
1132 
1133 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1134 @*/
1135 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1136 {
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1141   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1146 {
1147   PC_MG          *mg      = (PC_MG*)pc->data;
1148   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1149 
1150   PetscFunctionBegin;
1151   PetscCheckFalse(emax <= emin,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1152   PetscCheckFalse(emax*emin <= 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1153   pc_gamg->emax = emax;
1154   pc_gamg->emin = emin;
1155 
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 /*@
1160    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1161 
1162    Collective on PC
1163 
1164    Input Parameters:
1165 +  pc - the preconditioner context
1166 -  n - PETSC_TRUE or PETSC_FALSE
1167 
1168    Options Database Key:
1169 .  -pc_gamg_reuse_interpolation <true,false>
1170 
1171    Level: intermediate
1172 
1173    Notes:
1174     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1175           rebuilding the preconditioner quicker.
1176 
1177 .seealso: ()
1178 @*/
1179 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1180 {
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1185   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1190 {
1191   PC_MG   *mg      = (PC_MG*)pc->data;
1192   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1193 
1194   PetscFunctionBegin;
1195   pc_gamg->reuse_prol = n;
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 /*@
1200    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1201 
1202    Collective on PC
1203 
1204    Input Parameters:
1205 +  pc - the preconditioner context
1206 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1207 
1208    Options Database Key:
1209 .  -pc_gamg_asm_use_agg
1210 
1211    Level: intermediate
1212 
1213 .seealso: ()
1214 @*/
1215 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1216 {
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1221   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1226 {
1227   PC_MG   *mg      = (PC_MG*)pc->data;
1228   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1229 
1230   PetscFunctionBegin;
1231   pc_gamg->use_aggs_in_asm = flg;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /*@
1236    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1237 
1238    Collective on PC
1239 
1240    Input Parameters:
1241 +  pc - the preconditioner context
1242 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1243 
1244    Options Database Key:
1245 .  -pc_gamg_use_parallel_coarse_grid_solver
1246 
1247    Level: intermediate
1248 
1249 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1250 @*/
1251 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1252 {
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1257   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1262 {
1263   PC_MG   *mg      = (PC_MG*)pc->data;
1264   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1265 
1266   PetscFunctionBegin;
1267   pc_gamg->use_parallel_coarse_grid_solver = flg;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 /*@
1272    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1273 
1274    Collective on PC
1275 
1276    Input Parameters:
1277 +  pc - the preconditioner context
1278 -  flg - PETSC_TRUE to pin coarse grids to CPU
1279 
1280    Options Database Key:
1281 .  -pc_gamg_cpu_pin_coarse_grids
1282 
1283    Level: intermediate
1284 
1285 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1286 @*/
1287 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1288 {
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1293   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1298 {
1299   PC_MG   *mg      = (PC_MG*)pc->data;
1300   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1301 
1302   PetscFunctionBegin;
1303   pc_gamg->cpu_pin_coarse_grids = flg;
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /*@
1308    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1309 
1310    Collective on PC
1311 
1312    Input Parameters:
1313 +  pc - the preconditioner context
1314 -  flg - Layout type
1315 
1316    Options Database Key:
1317 .  -pc_gamg_coarse_grid_layout_type
1318 
1319    Level: intermediate
1320 
1321 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1322 @*/
1323 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1324 {
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1329   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1334 {
1335   PC_MG   *mg      = (PC_MG*)pc->data;
1336   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1337 
1338   PetscFunctionBegin;
1339   pc_gamg->layout_type = flg;
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /*@
1344    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1345 
1346    Not collective on PC
1347 
1348    Input Parameters:
1349 +  pc - the preconditioner
1350 -  n - the maximum number of levels to use
1351 
1352    Options Database Key:
1353 .  -pc_mg_levels
1354 
1355    Level: intermediate
1356 
1357 .seealso: ()
1358 @*/
1359 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1360 {
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1365   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1370 {
1371   PC_MG   *mg      = (PC_MG*)pc->data;
1372   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1373 
1374   PetscFunctionBegin;
1375   pc_gamg->Nlevels = n;
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /*@
1380    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1381 
1382    Not collective on PC
1383 
1384    Input Parameters:
1385 +  pc - the preconditioner context
1386 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1387 -  n - number of threshold values provided in array
1388 
1389    Options Database Key:
1390 .  -pc_gamg_threshold <threshold>
1391 
1392    Notes:
1393     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1394     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1395 
1396     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1397     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1398     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1399 
1400    Level: intermediate
1401 
1402 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1403 @*/
1404 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1405 {
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1410   if (n) PetscValidRealPointer(v,2);
1411   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1416 {
1417   PC_MG   *mg      = (PC_MG*)pc->data;
1418   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1419   PetscInt i;
1420   PetscFunctionBegin;
1421   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1422   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 /*@
1427    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1428 
1429    Collective on PC
1430 
1431    Input Parameters:
1432 +  pc - the preconditioner context
1433 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1434 -  n - number of values provided in array
1435 
1436    Options Database Key:
1437 .  -pc_gamg_rank_reduction_factors <factors>
1438 
1439    Level: intermediate
1440 
1441 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1442 @*/
1443 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1444 {
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1449   if (n) PetscValidIntPointer(v,2);
1450   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1455 {
1456   PC_MG   *mg      = (PC_MG*)pc->data;
1457   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1458   PetscInt i;
1459   PetscFunctionBegin;
1460   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1461   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 /*@
1466    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1467 
1468    Not collective on PC
1469 
1470    Input Parameters:
1471 +  pc - the preconditioner context
1472 -  scale - the threshold value reduction, ussually < 1.0
1473 
1474    Options Database Key:
1475 .  -pc_gamg_threshold_scale <v>
1476 
1477    Notes:
1478    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1479    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1480 
1481    Level: advanced
1482 
1483 .seealso: PCGAMGSetThreshold()
1484 @*/
1485 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1486 {
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1491   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1496 {
1497   PC_MG   *mg      = (PC_MG*)pc->data;
1498   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1499   PetscFunctionBegin;
1500   pc_gamg->threshold_scale = v;
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 /*@C
1505    PCGAMGSetType - Set solution method
1506 
1507    Collective on PC
1508 
1509    Input Parameters:
1510 +  pc - the preconditioner context
1511 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1512 
1513    Options Database Key:
1514 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1515 
1516    Level: intermediate
1517 
1518 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1519 @*/
1520 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1521 {
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1526   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 /*@C
1531    PCGAMGGetType - Get solution method
1532 
1533    Collective on PC
1534 
1535    Input Parameter:
1536 .  pc - the preconditioner context
1537 
1538    Output Parameter:
1539 .  type - the type of algorithm used
1540 
1541    Level: intermediate
1542 
1543 .seealso: PCGAMGSetType(), PCGAMGType
1544 @*/
1545 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1546 {
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1551   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1556 {
1557   PC_MG          *mg      = (PC_MG*)pc->data;
1558   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1559 
1560   PetscFunctionBegin;
1561   *type = pc_gamg->type;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1566 {
1567   PetscErrorCode ierr,(*r)(PC);
1568   PC_MG          *mg      = (PC_MG*)pc->data;
1569   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1570 
1571   PetscFunctionBegin;
1572   pc_gamg->type = type;
1573   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1574   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1575   if (pc_gamg->ops->destroy) {
1576     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1577     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1578     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1579     /* cleaning up common data in pc_gamg - this should disapear someday */
1580     pc_gamg->data_cell_cols = 0;
1581     pc_gamg->data_cell_rows = 0;
1582     pc_gamg->orig_data_cell_cols = 0;
1583     pc_gamg->orig_data_cell_rows = 0;
1584     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1585     pc_gamg->data_sz = 0;
1586   }
1587   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1588   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1589   ierr = (*r)(pc);CHKERRQ(ierr);
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1594 {
1595   PetscErrorCode ierr,i;
1596   PC_MG          *mg      = (PC_MG*)pc->data;
1597   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1598   PetscReal       gc=0, oc=0;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1602   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1603   for (i=0;i<mg->nlevels; i++) {
1604     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1605   }
1606   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1607   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1608   if (pc_gamg->use_aggs_in_asm) {
1609     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1610   }
1611   if (pc_gamg->use_parallel_coarse_grid_solver) {
1612     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1613   }
1614 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1615   if (pc_gamg->cpu_pin_coarse_grids) {
1616     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1617   }
1618 #endif
1619   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1620   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1621   /* } else { */
1622   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1623   /* } */
1624   if (pc_gamg->ops->view) {
1625     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1626   }
1627   ierr = PCMGGetGridComplexity(pc,&gc,&oc);CHKERRQ(ierr);
1628   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g    operator = %g\n",gc,oc);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1633 {
1634   PetscErrorCode ierr;
1635   PC_MG          *mg      = (PC_MG*)pc->data;
1636   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1637   PetscBool      flag;
1638   MPI_Comm       comm;
1639   char           prefix[256],tname[32];
1640   PetscInt       i,n;
1641   const char     *pcpre;
1642   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1643   PetscFunctionBegin;
1644   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1645   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1646   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1647   if (flag) {
1648     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1649   }
1650   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1651   if (flag) {
1652     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1653   }
1654   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1655   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",pc_gamg->use_sa_esteig,&pc_gamg->use_sa_esteig,&flag);CHKERRQ(ierr);
1656   if (flag) pc_gamg->use_sa_esteig_set = PETSC_TRUE;
1657   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1658   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1659   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1660   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1661   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1662   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1663   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1664   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1665   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1666   n = PETSC_MG_MAXLEVELS;
1667   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1668   if (!flag || n < PETSC_MG_MAXLEVELS) {
1669     if (!flag) n = 1;
1670     i = n;
1671     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1672   }
1673   n = PETSC_MG_MAXLEVELS;
1674   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1675   if (!flag) i = 0;
1676   else i = n;
1677   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1678   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1679   {
1680     PetscReal eminmax[2] = {0., 0.};
1681     n = 2;
1682     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1683     if (flag) {
1684       PetscCheckFalse(n != 2,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1685       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1686     }
1687   }
1688   /* set options for subtype */
1689   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1690 
1691   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1692   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1693   ierr = PetscOptionsTail();CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 /* -------------------------------------------------------------------------- */
1698 /*MC
1699      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1700 
1701    Options Database Keys:
1702 +   -pc_gamg_type <type> - one of agg, geo, or classical
1703 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1704 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1705 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1706 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1707                                         equations on each process that has degrees of freedom
1708 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1709 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1710 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1711 
1712    Options Database Keys for default Aggregation:
1713 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1714 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1715 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1716 
1717    Multigrid options:
1718 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1719 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1720 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1721 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1722 
1723   Notes:
1724     In order to obtain good performance for PCGAMG for vector valued problems you must
1725        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1726        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1727        See the Users Manual Chapter 4 for more details
1728 
1729   Level: intermediate
1730 
1731 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1732            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1733 M*/
1734 
1735 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1736 {
1737   PetscErrorCode ierr,i;
1738   PC_GAMG        *pc_gamg;
1739   PC_MG          *mg;
1740 
1741   PetscFunctionBegin;
1742    /* register AMG type */
1743   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1744 
1745   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1746   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1747   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1748 
1749   /* create a supporting struct and attach it to pc */
1750   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1751   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1752   mg           = (PC_MG*)pc->data;
1753   mg->innerctx = pc_gamg;
1754 
1755   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1756 
1757   /* these should be in subctx but repartitioning needs simple arrays */
1758   pc_gamg->data_sz = 0;
1759   pc_gamg->data    = NULL;
1760 
1761   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1762   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1763   pc->ops->setup          = PCSetUp_GAMG;
1764   pc->ops->reset          = PCReset_GAMG;
1765   pc->ops->destroy        = PCDestroy_GAMG;
1766   mg->view                = PCView_GAMG;
1767 
1768   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1770   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1773   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1776   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1788   pc_gamg->repart           = PETSC_FALSE;
1789   pc_gamg->reuse_prol       = PETSC_FALSE;
1790   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1791   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1792   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1793   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1794   pc_gamg->min_eq_proc      = 50;
1795   pc_gamg->coarse_eq_limit  = 50;
1796   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1797   pc_gamg->threshold_scale = 1.;
1798   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1799   pc_gamg->current_level    = 0; /* don't need to init really */
1800   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1801   pc_gamg->esteig_max_it    = 10;
1802   pc_gamg->use_sa_esteig    = PETSC_TRUE;
1803   pc_gamg->emin             = 0;
1804   pc_gamg->emax             = 0;
1805 
1806   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1807 
1808   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1809   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 /*@C
1814  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1815     from PCInitializePackage().
1816 
1817  Level: developer
1818 
1819  .seealso: PetscInitialize()
1820 @*/
1821 PetscErrorCode PCGAMGInitializePackage(void)
1822 {
1823   PetscErrorCode ierr;
1824   PetscInt       l;
1825 
1826   PetscFunctionBegin;
1827   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1828   PCGAMGPackageInitialized = PETSC_TRUE;
1829   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1830   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1831   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1832   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1833 
1834   /* general events */
1835   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1836   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1837   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1838   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1839   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1840   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1841   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1842 
1843   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1844   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1845   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1846   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1847   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1848   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1849   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1850   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1851   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1852   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1853   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1854   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1855   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1856   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1857   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1858   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1859   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1860   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1861     char ename[32];
1862 
1863     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1864     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1865     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1866     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1867     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1868     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1869   }
1870   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1871   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1872   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1873   /* create timer stages */
1874 #if defined(GAMG_STAGES)
1875   {
1876     char     str[32];
1877     PetscInt lidx;
1878     sprintf(str,"MG Level %d (finest)",0);
1879     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1880     for (lidx=1; lidx<9; lidx++) {
1881       sprintf(str,"MG Level %d",(int)lidx);
1882       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1883     }
1884   }
1885 #endif
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 /*@C
1890  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1891     called from PetscFinalize() automatically.
1892 
1893  Level: developer
1894 
1895  .seealso: PetscFinalize()
1896 @*/
1897 PetscErrorCode PCGAMGFinalizePackage(void)
1898 {
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   PCGAMGPackageInitialized = PETSC_FALSE;
1903   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 /*@C
1908  PCGAMGRegister - Register a PCGAMG implementation.
1909 
1910  Input Parameters:
1911  + type - string that will be used as the name of the GAMG type.
1912  - create - function for creating the gamg context.
1913 
1914   Level: advanced
1915 
1916  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1917 @*/
1918 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1919 {
1920   PetscErrorCode ierr;
1921 
1922   PetscFunctionBegin;
1923   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1924   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928