xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 57181aed8e0ad51defbf9cefc682654f57e169fa)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /* Hack to access same_local_solves */
7 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
8 
9 #if defined(PETSC_HAVE_CUDA)
10   #include <cuda_runtime.h>
11 #endif
12 
13 #if defined(PETSC_HAVE_HIP)
14   #include <hip/hip_runtime.h>
15 #endif
16 
17 #if defined PETSC_GAMG_USE_LOG
18 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
19 #endif
20 
21 #if defined PETSC_USE_LOG
22 PetscLogEvent PC_GAMGGraph_AGG;
23 PetscLogEvent PC_GAMGGraph_GEO;
24 PetscLogEvent PC_GAMGCoarsen_AGG;
25 PetscLogEvent PC_GAMGCoarsen_GEO;
26 PetscLogEvent PC_GAMGProlongator_AGG;
27 PetscLogEvent PC_GAMGProlongator_GEO;
28 PetscLogEvent PC_GAMGOptProlongator_AGG;
29 #endif
30 
31 /* #define GAMG_STAGES */
32 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
33 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
34 #endif
35 
36 static PetscFunctionList GAMGList = NULL;
37 static PetscBool PCGAMGPackageInitialized;
38 
39 /* ----------------------------------------------------------------------------- */
40 PetscErrorCode PCReset_GAMG(PC pc)
41 {
42   PetscErrorCode ierr, level;
43   PC_MG          *mg      = (PC_MG*)pc->data;
44   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
45 
46   PetscFunctionBegin;
47   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
48   pc_gamg->data_sz = 0;
49   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
50   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
51     mg->min_eigen_DinvA[level] = 0;
52     mg->max_eigen_DinvA[level] = 0;
53   }
54   pc_gamg->emin = 0;
55   pc_gamg->emax = 0;
56   PetscFunctionReturn(0);
57 }
58 
59 /* -------------------------------------------------------------------------- */
60 /*
61    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
62      of active processors.
63 
64    Input Parameter:
65    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
66           'pc_gamg->data_sz' are changed via repartitioning/reduction.
67    . Amat_fine - matrix on this fine (k) level
68    . cr_bs - coarse block size
69    In/Output Parameter:
70    . a_P_inout - prolongation operator to the next level (k-->k-1)
71    . a_nactive_proc - number of active procs
72    Output Parameter:
73    . a_Amat_crs - coarse matrix that is created (k-1)
74 */
75 
76 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
77 {
78   PetscErrorCode  ierr;
79   PC_MG           *mg         = (PC_MG*)pc->data;
80   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
81   Mat             Cmat,Pold=*a_P_inout;
82   MPI_Comm        comm;
83   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
84   PetscInt        ncrs_eq,ncrs,f_bs;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
88   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
89   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
90   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
91   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
92 
93   if (Pcolumnperm) *Pcolumnperm = NULL;
94 
95   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
96   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
97   if (pc_gamg->data_cell_rows>0) {
98     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
99   } else {
100     PetscInt  bs;
101     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
102     ncrs = ncrs_eq/bs;
103   }
104   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
105   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
106 #if defined(PETSC_HAVE_CUDA)
107     PetscShmComm pshmcomm;
108     PetscMPIInt  locrank;
109     MPI_Comm     loccomm;
110     PetscInt     s_nnodes,r_nnodes, new_new_size;
111     cudaError_t  cerr;
112     int          devCount;
113     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
114     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
115     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRQ(ierr);
116     s_nnodes = !locrank;
117     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
118     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
119     devCount = 0;
120     cerr = cudaGetDeviceCount(&devCount);
121     cudaGetLastError(); /* Reset the last error */
122     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
123       new_new_size = r_nnodes * devCount;
124       new_size = new_new_size;
125       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %D --> %D (devCount=%D #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
126     } else {
127       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
128       goto HEURISTIC;
129     }
130 #else
131     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
132 #endif
133   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
134     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
135     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
136     ierr = PetscInfo3(pc,"Manually setting reduction to %D active processes (%D/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
137   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
138     new_size = 1;
139     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %D active processes\n",new_size);CHKERRQ(ierr);
140   } else {
141     PetscInt ncrs_eq_glob;
142 #if defined(PETSC_HAVE_CUDA)
143     HEURISTIC:
144 #endif
145     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
146     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
147     if (!new_size) new_size = 1; /* not likely, posible? */
148     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
149     ierr = PetscInfo2(pc,"Coarse grid reduction from %D to %D active processes\n",nactive,new_size);CHKERRQ(ierr);
150   }
151   if (new_size==nactive) {
152     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
153     if (new_size < size) {
154       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
155       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%D) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
156       if (pc_gamg->cpu_pin_coarse_grids) {
157         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
158         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
159       }
160     }
161     /* we know that the grid structure can be reused in MatPtAP */
162   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
163     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
164     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
165     nloc_old = ncrs_eq/cr_bs;
166     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
167     /* get new_size and rfactor */
168     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
169       /* find factor */
170       if (new_size == 1) rfactor = size; /* don't modify */
171       else {
172         PetscReal best_fact = 0.;
173         jj = -1;
174         for (kk = 1 ; kk <= size ; kk++) {
175           if (!(size%kk)) { /* a candidate */
176             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
177             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
178             if (fact > best_fact) {
179               best_fact = fact; jj = kk;
180             }
181           }
182         }
183         if (jj != -1) rfactor = jj;
184         else rfactor = 1; /* a prime */
185         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
186         else expand_factor = rfactor;
187       }
188       new_size = size/rfactor; /* make new size one that is factor */
189       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
190         *a_Amat_crs = Cmat;
191         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%D, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
192         PetscFunctionReturn(0);
193       }
194     }
195 #if defined PETSC_GAMG_USE_LOG
196     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
197 #endif
198     /* make 'is_eq_newproc' */
199     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
200     if (pc_gamg->repart) {
201       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
202       Mat      adj;
203       ierr = PetscInfo4(pc,"Repartition: size (active): %D --> %D, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
204       /* get 'adj' */
205       if (cr_bs == 1) {
206         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
207       } else {
208         /* make a scalar matrix to partition (no Stokes here) */
209         Mat               tMat;
210         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
211         const PetscScalar *vals;
212         const PetscInt    *idx;
213         PetscInt          *d_nnz, *o_nnz, M, N;
214         static PetscInt   llev = 0; /* ugly but just used for debugging */
215         MatType           mtype;
216 
217         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
218         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
219         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
220         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
221           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
222           d_nnz[jj] = ncols/cr_bs;
223           o_nnz[jj] = ncols/cr_bs;
224           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
225           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
226           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
227         }
228 
229         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
230         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
231         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
232         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
233         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
234         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
235         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
236 
237         for (ii = Istart_crs; ii < Iend_crs; ii++) {
238           PetscInt dest_row = ii/cr_bs;
239           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
240           for (jj = 0; jj < ncols; jj++) {
241             PetscInt    dest_col = idx[jj]/cr_bs;
242             PetscScalar v        = 1.0;
243             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
244           }
245           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
246         }
247         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
249 
250         if (llev++ == -1) {
251           PetscViewer viewer; char fname[32];
252           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
253           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
254           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
255           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
256         }
257         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
258         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
259       } /* create 'adj' */
260 
261       { /* partition: get newproc_idx */
262         char            prefix[256];
263         const char      *pcpre;
264         const PetscInt  *is_idx;
265         MatPartitioning mpart;
266         IS              proc_is;
267 
268         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
270         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
271         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
272         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
273         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
274         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
275         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
276         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
277 
278         /* collect IS info */
279         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
280         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
281         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
282           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
283             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
284           }
285         }
286         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
287         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
288       }
289       ierr = MatDestroy(&adj);CHKERRQ(ierr);
290 
291       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
292       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
293     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
294       PetscInt targetPE;
295       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
296       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
297       targetPE = (rank/rfactor)*expand_factor;
298       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
299     } /* end simple 'is_eq_newproc' */
300 
301     /*
302       Create an index set from the is_eq_newproc index set to indicate the mapping TO
303     */
304     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
305     is_eq_num_prim = is_eq_num;
306     /*
307       Determine how many equations/vertices are assigned to each processor
308     */
309     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
310     ncrs_eq_new = counts[rank];
311     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
312     ncrs_new = ncrs_eq_new/cr_bs;
313 
314     ierr = PetscFree(counts);CHKERRQ(ierr);
315     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
316     {
317       Vec            src_crd, dest_crd;
318       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
319       VecScatter     vecscat;
320       PetscScalar    *array;
321       IS isscat;
322       /* move data (for primal equations only) */
323       /* Create a vector to contain the newly ordered element information */
324       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
325       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
326       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
327       /*
328         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
329         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
330       */
331       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
332       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       for (ii=0,jj=0; ii<ncrs; ii++) {
334         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
335         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
336       }
337       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
338       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
339       ierr = PetscFree(tidx);CHKERRQ(ierr);
340       /*
341         Create a vector to contain the original vertex information for each element
342       */
343       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
344       for (jj=0; jj<ndata_cols; jj++) {
345         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
346         for (ii=0; ii<ncrs; ii++) {
347           for (kk=0; kk<ndata_rows; kk++) {
348             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
349             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
350             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
351           }
352         }
353       }
354       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
355       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
356       /*
357         Scatter the element vertex information (still in the original vertex ordering)
358         to the correct processor
359       */
360       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
361       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
362       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
363       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
364       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
365       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
366       /*
367         Put the element vertex data into a new allocation of the gdata->ele
368       */
369       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
370       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
371 
372       pc_gamg->data_sz = node_data_sz*ncrs_new;
373       strideNew        = ncrs_new*ndata_rows;
374 
375       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
376       for (jj=0; jj<ndata_cols; jj++) {
377         for (ii=0; ii<ncrs_new; ii++) {
378           for (kk=0; kk<ndata_rows; kk++) {
379             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
380             pc_gamg->data[ix] = PetscRealPart(array[jx]);
381           }
382         }
383       }
384       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
385       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
386     }
387     /* move A and P (columns) with new layout */
388 #if defined PETSC_GAMG_USE_LOG
389     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
390 #endif
391     /*
392       Invert for MatCreateSubMatrix
393     */
394     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
395     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
396     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
397     if (is_eq_num != is_eq_num_prim) {
398       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
399     }
400     if (Pcolumnperm) {
401       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
402       *Pcolumnperm = new_eq_indices;
403     }
404     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
405 #if defined PETSC_GAMG_USE_LOG
406     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
407     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
408 #endif
409     /* 'a_Amat_crs' output */
410     {
411       Mat mat;
412       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
413       *a_Amat_crs = mat;
414     }
415     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
416 
417 #if defined PETSC_GAMG_USE_LOG
418     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
419 #endif
420     /* prolongator */
421     {
422       IS       findices;
423       PetscInt Istart,Iend;
424       Mat      Pnew;
425 
426       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
427 #if defined PETSC_GAMG_USE_LOG
428       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
429 #endif
430       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
431       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
432       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
433       ierr = ISDestroy(&findices);CHKERRQ(ierr);
434 
435 #if defined PETSC_GAMG_USE_LOG
436       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
437 #endif
438       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
439 
440       /* output - repartitioned */
441       *a_P_inout = Pnew;
442     }
443     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
444 
445     *a_nactive_proc = new_size; /* output */
446 
447     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
448     if (pc_gamg->cpu_pin_coarse_grids) {
449 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
450       static PetscInt llev = 2;
451       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
452 #endif
453       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
454       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
455       if (1) { /* lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
456         Mat         A = *a_Amat_crs, P = *a_P_inout;
457         PetscMPIInt size;
458         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
459         if (size > 1) {
460           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
461           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
462           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
463         }
464       }
465     }
466 #if defined PETSC_GAMG_USE_LOG
467     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
468 #endif
469   }
470   PetscFunctionReturn(0);
471 }
472 
473 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
474 {
475   PetscErrorCode ierr;
476   const char     *prefix;
477   char           addp[32];
478   PC_MG          *mg      = (PC_MG*)a_pc->data;
479   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
480 
481   PetscFunctionBegin;
482   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
483   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
484 #if defined(PETSC_HAVE_CUDA)
485   ierr = MatAIJCUSPARSESetGenerateTranspose(Gmat1,PETSC_TRUE);CHKERRQ(ierr);
486 #endif
487   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
488   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
489   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
490   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
491   /* TODO: if we know the matrix is symmetric we can pass MATPRODUCT_AB */
492   ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
493   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
494   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
495   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
496   (*Gmat2)->assembled = PETSC_TRUE;
497   PetscFunctionReturn(0);
498 }
499 
500 /* -------------------------------------------------------------------------- */
501 /*
502    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
503                     by setting data structures and options.
504 
505    Input Parameter:
506 .  pc - the preconditioner context
507 
508 */
509 PetscErrorCode PCSetUp_GAMG(PC pc)
510 {
511   PetscErrorCode ierr;
512   PC_MG          *mg      = (PC_MG*)pc->data;
513   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
514   Mat            Pmat     = pc->pmat;
515   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
516   MPI_Comm       comm;
517   PetscMPIInt    rank,size,nactivepe;
518   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
519   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
520   PetscLogDouble nnz0=0.,nnztot=0.;
521   MatInfo        info;
522   PetscBool      is_last = PETSC_FALSE;
523 
524   PetscFunctionBegin;
525   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
526   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
527   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
528 
529   if (pc->setupcalled) {
530     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
531       /* reset everything */
532       ierr = PCReset_MG(pc);CHKERRQ(ierr);
533       pc->setupcalled = 0;
534     } else {
535       PC_MG_Levels **mglevels = mg->levels;
536       /* just do Galerkin grids */
537       Mat          B,dA,dB;
538 
539       if (pc_gamg->Nlevels > 1) {
540         /* currently only handle case where mat and pmat are the same on coarser levels */
541         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
542         /* (re)set to get dirty flag */
543         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
544 
545         for (level=pc_gamg->Nlevels-2; level>=0; level--) {
546           MatReuse reuse = MAT_INITIAL_MATRIX ;
547 
548           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
549           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
550           if (B->product) {
551             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
552               reuse = MAT_REUSE_MATRIX;
553             }
554           }
555           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
556           if (reuse == MAT_REUSE_MATRIX) {
557             ierr = PetscInfo1(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
558           } else {
559             ierr = PetscInfo1(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
560           }
561           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
562           mglevels[level]->A = B;
563           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
564           dB   = B;
565         }
566       }
567 
568       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
569       PetscFunctionReturn(0);
570     }
571   }
572 
573   if (!pc_gamg->data) {
574     if (pc_gamg->orig_data) {
575       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
576       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
577 
578       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
579       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
580       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
581 
582       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
583       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
584     } else {
585       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
586       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
587     }
588   }
589 
590   /* cache original data for reuse */
591   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
592     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
593     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
594     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
595     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
596   }
597 
598   /* get basic dims */
599   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
600   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
601 
602   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
603   nnz0   = info.nz_used;
604   nnztot = info.nz_used;
605   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
606 
607   /* Get A_i and R_i */
608   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
609     pc_gamg->current_level = level;
610     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
611     level1 = level + 1;
612 #if defined PETSC_GAMG_USE_LOG
613     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
614 #if (defined GAMG_STAGES)
615     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
616 #endif
617 #endif
618     { /* construct prolongator */
619       Mat              Gmat;
620       PetscCoarsenData *agg_lists;
621       Mat              Prol11;
622 
623       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
624       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
625       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
626 
627       /* could have failed to create new level */
628       if (Prol11) {
629         const char *prefix;
630         char       addp[32];
631 
632         /* get new block size of coarse matrices */
633         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
634 
635         if (pc_gamg->ops->optprolongator) {
636           /* smooth */
637           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
638         }
639 
640         if (pc_gamg->use_aggs_in_asm) {
641           PetscInt bs;
642           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
643           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
644         }
645 
646         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
647         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
648         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
649         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
650         /* Always generate the transpose with CUDA
651            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
652 #if defined(PETSC_HAVE_CUDA)
653         {
654           PetscBool ismpiaij;
655 
656           ierr = PetscObjectBaseTypeCompare((PetscObject)Prol11,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
657           if (ismpiaij) {
658             Mat Prol_d,Prol_o;
659 
660             ierr = MatMPIAIJGetSeqAIJ(Prol11,&Prol_d,&Prol_o,NULL);CHKERRQ(ierr);
661             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol_d,PETSC_TRUE);CHKERRQ(ierr);
662             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol_o,PETSC_TRUE);CHKERRQ(ierr);
663           } else {
664             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol11,PETSC_TRUE);CHKERRQ(ierr);
665           }
666         }
667 #endif
668         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
669         Parr[level1] = Prol11;
670       } else Parr[level1] = NULL; /* failed to coarsen */
671 
672       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
673       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
674     } /* construct prolongator scope */
675 #if defined PETSC_GAMG_USE_LOG
676     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
677 #endif
678     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
679     if (!Parr[level1]) { /* failed to coarsen */
680       ierr =  PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
681 #if defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES
682       ierr = PetscLogStagePop();CHKERRQ(ierr);
683 #endif
684       break;
685     }
686 #if defined PETSC_GAMG_USE_LOG
687     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
688 #endif
689     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
690     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
691     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
692     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
693     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
694 
695 #if defined PETSC_GAMG_USE_LOG
696     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
697 #endif
698     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
699     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
700     nnztot += info.nz_used;
701     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
702 
703 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
704     ierr = PetscLogStagePop();CHKERRQ(ierr);
705 #endif
706     /* stop if one node or one proc -- could pull back for singular problems */
707     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
708       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
709       level++;
710       break;
711     }
712   } /* levels */
713   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
714 
715   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
716   pc_gamg->Nlevels = level + 1;
717   fine_level       = level;
718   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
719 
720   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
721     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
722 
723     /* set default smoothers & set operators */
724     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
725       KSP smoother;
726       PC  subpc;
727 
728       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
729       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
730 
731       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
732       /* set ops */
733       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
734       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
735 
736       /* set defaults */
737       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
738 
739       /* set blocks for ASM smoother that uses the 'aggregates' */
740       if (pc_gamg->use_aggs_in_asm) {
741         PetscInt sz;
742         IS       *iss;
743 
744         sz   = nASMBlocksArr[level];
745         iss  = ASMLocalIDsArr[level];
746         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
747         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
748         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
749         if (!sz) {
750           IS       is;
751           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
752           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
753           ierr = ISDestroy(&is);CHKERRQ(ierr);
754         } else {
755           PetscInt kk;
756           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
757           for (kk=0; kk<sz; kk++) {
758             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
759           }
760           ierr = PetscFree(iss);CHKERRQ(ierr);
761         }
762         ASMLocalIDsArr[level] = NULL;
763         nASMBlocksArr[level]  = 0;
764       } else {
765         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
766       }
767     }
768     {
769       /* coarse grid */
770       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
771       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
772 
773       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
774       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
775       if (!pc_gamg->use_parallel_coarse_grid_solver) {
776         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
777         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
778         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
779         ierr = PCSetUp(subpc);CHKERRQ(ierr);
780         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
781         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
782         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
783         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
784         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
785         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
786         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
787         /* This flag gets reset by PCBJacobiGetSubKSP(), but our BJacobi really does the same algorithm everywhere (and in
788          * fact, all but one process will have zero dofs), so we reset the flag to avoid having PCView_BJacobi attempt to
789          * view every subdomain as though they were different. */
790         ((PC_BJacobi*)subpc->data)->same_local_solves = PETSC_TRUE;
791       }
792     }
793 
794     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
795     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
796     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
797     ierr = PetscOptionsEnd();CHKERRQ(ierr);
798     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
799 
800     /* setup cheby eigen estimates from SA */
801     if (pc_gamg->use_sa_esteig==1) {
802       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
803         KSP       smoother;
804         PetscBool ischeb;
805 
806         savesetfromoptions[level] = NULL;
807         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
808         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
809         if (ischeb) {
810           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
811 
812           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
813           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
814             PC        subpc;
815             PetscBool isjac;
816             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
817             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
818             if (isjac && pc_gamg->use_sa_esteig==1) {
819               PetscReal emax,emin;
820 
821               emin = mg->min_eigen_DinvA[level];
822               emax = mg->max_eigen_DinvA[level];
823               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
824               cheb->emin_computed = emin;
825               cheb->emax_computed = emax;
826               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
827 
828               /* We have set the eigenvalues and consumed the transformation values
829                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
830                  below when setfromoptions will be called again */
831               savesetfromoptions[level] = smoother->ops->setfromoptions;
832               smoother->ops->setfromoptions = NULL;
833             }
834           }
835         }
836       }
837     }
838 
839     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
840 
841     /* restore Chebyshev smoother for next calls */
842     if (pc_gamg->use_sa_esteig==1) {
843       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
844         if (savesetfromoptions[level]) {
845           KSP smoother;
846           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
847           smoother->ops->setfromoptions = savesetfromoptions[level];
848         }
849       }
850     }
851 
852     /* clean up */
853     for (level=1; level<pc_gamg->Nlevels; level++) {
854       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
855       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
856     }
857   } else {
858     KSP smoother;
859 
860     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
861     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
862     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
863     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
864     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
865   }
866   PetscFunctionReturn(0);
867 }
868 
869 /* ------------------------------------------------------------------------- */
870 /*
871  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
872    that was created with PCCreate_GAMG().
873 
874    Input Parameter:
875 .  pc - the preconditioner context
876 
877    Application Interface Routine: PCDestroy()
878 */
879 PetscErrorCode PCDestroy_GAMG(PC pc)
880 {
881   PetscErrorCode ierr;
882   PC_MG          *mg     = (PC_MG*)pc->data;
883   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
884 
885   PetscFunctionBegin;
886   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
887   if (pc_gamg->ops->destroy) {
888     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
891   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
892   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
893   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 /*@
898    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
899 
900    Logically Collective on PC
901 
902    Input Parameters:
903 +  pc - the preconditioner context
904 -  n - the number of equations
905 
906 
907    Options Database Key:
908 .  -pc_gamg_process_eq_limit <limit>
909 
910    Notes:
911     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
912           that has degrees of freedom
913 
914    Level: intermediate
915 
916 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
917 @*/
918 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
919 {
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
924   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
929 {
930   PC_MG   *mg      = (PC_MG*)pc->data;
931   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
932 
933   PetscFunctionBegin;
934   if (n>0) pc_gamg->min_eq_proc = n;
935   PetscFunctionReturn(0);
936 }
937 
938 /*@
939    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
940 
941  Collective on PC
942 
943    Input Parameters:
944 +  pc - the preconditioner context
945 -  n - maximum number of equations to aim for
946 
947    Options Database Key:
948 .  -pc_gamg_coarse_eq_limit <limit>
949 
950    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
951      has less than 1000 unknowns.
952 
953    Level: intermediate
954 
955 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
956 @*/
957 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
958 {
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
963   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
968 {
969   PC_MG   *mg      = (PC_MG*)pc->data;
970   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
971 
972   PetscFunctionBegin;
973   if (n>0) pc_gamg->coarse_eq_limit = n;
974   PetscFunctionReturn(0);
975 }
976 
977 /*@
978    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
979 
980    Collective on PC
981 
982    Input Parameters:
983 +  pc - the preconditioner context
984 -  n - PETSC_TRUE or PETSC_FALSE
985 
986    Options Database Key:
987 .  -pc_gamg_repartition <true,false>
988 
989    Notes:
990     this will generally improve the loading balancing of the work on each level
991 
992    Level: intermediate
993 
994 .seealso: ()
995 @*/
996 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
997 {
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1002   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
1007 {
1008   PC_MG   *mg      = (PC_MG*)pc->data;
1009   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1010 
1011   PetscFunctionBegin;
1012   pc_gamg->repart = n;
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /*@
1017    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
1018 
1019    Collective on PC
1020 
1021    Input Parameters:
1022 +  pc - the preconditioner context
1023 -  n - number of its
1024 
1025    Options Database Key:
1026 .  -pc_gamg_esteig_ksp_max_it <its>
1027 
1028    Notes:
1029 
1030    Level: intermediate
1031 
1032 .seealso: ()
1033 @*/
1034 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
1035 {
1036   PetscErrorCode ierr;
1037 
1038   PetscFunctionBegin;
1039   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1040   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1045 {
1046   PC_MG   *mg      = (PC_MG*)pc->data;
1047   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1048 
1049   PetscFunctionBegin;
1050   pc_gamg->esteig_max_it = n;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 /*@
1055    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1056 
1057    Collective on PC
1058 
1059    Input Parameters:
1060 +  pc - the preconditioner context
1061 -  n - number of its
1062 
1063    Options Database Key:
1064 .  -pc_gamg_use_sa_esteig <true,false>
1065 
1066    Notes:
1067 
1068    Level: intermediate
1069 
1070 .seealso: ()
1071 @*/
1072 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1073 {
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1078   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1083 {
1084   PC_MG   *mg      = (PC_MG*)pc->data;
1085   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1086 
1087   PetscFunctionBegin;
1088   pc_gamg->use_sa_esteig = n ? 1 : 0;
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*@C
1093    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1094 
1095    Collective on PC
1096 
1097    Input Parameters:
1098 +  pc - the preconditioner context
1099 -  t - ksp type
1100 
1101    Options Database Key:
1102 .  -pc_gamg_esteig_ksp_type <type>
1103 
1104    Notes:
1105 
1106    Level: intermediate
1107 
1108 .seealso: ()
1109 @*/
1110 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1111 {
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1116   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1121 {
1122   PetscErrorCode ierr;
1123   PC_MG   *mg      = (PC_MG*)pc->data;
1124   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 /*@
1132    PCGAMGSetEigenvalues - Set eigenvalues
1133 
1134    Collective on PC
1135 
1136    Input Parameters:
1137 +  pc - the preconditioner context
1138 -  emax - max eigenvalue
1139 .  emin - min eigenvalue
1140 
1141    Options Database Key:
1142 .  -pc_gamg_eigenvalues
1143 
1144    Level: intermediate
1145 
1146 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1147 @*/
1148 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1149 {
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1154   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1158 {
1159   PC_MG          *mg      = (PC_MG*)pc->data;
1160   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1161 
1162   PetscFunctionBegin;
1163   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1164   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1165   pc_gamg->emax = emax;
1166   pc_gamg->emin = emin;
1167 
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 /*@
1172    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1173 
1174    Collective on PC
1175 
1176    Input Parameters:
1177 +  pc - the preconditioner context
1178 -  n - PETSC_TRUE or PETSC_FALSE
1179 
1180    Options Database Key:
1181 .  -pc_gamg_reuse_interpolation <true,false>
1182 
1183    Level: intermediate
1184 
1185    Notes:
1186     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1187           rebuilding the preconditioner quicker.
1188 
1189 .seealso: ()
1190 @*/
1191 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1192 {
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1197   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1202 {
1203   PC_MG   *mg      = (PC_MG*)pc->data;
1204   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1205 
1206   PetscFunctionBegin;
1207   pc_gamg->reuse_prol = n;
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /*@
1212    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1213 
1214    Collective on PC
1215 
1216    Input Parameters:
1217 +  pc - the preconditioner context
1218 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1219 
1220    Options Database Key:
1221 .  -pc_gamg_asm_use_agg
1222 
1223    Level: intermediate
1224 
1225 .seealso: ()
1226 @*/
1227 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1228 {
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1233   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1238 {
1239   PC_MG   *mg      = (PC_MG*)pc->data;
1240   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1241 
1242   PetscFunctionBegin;
1243   pc_gamg->use_aggs_in_asm = flg;
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 /*@
1248    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1249 
1250    Collective on PC
1251 
1252    Input Parameters:
1253 +  pc - the preconditioner context
1254 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1255 
1256    Options Database Key:
1257 .  -pc_gamg_use_parallel_coarse_grid_solver
1258 
1259    Level: intermediate
1260 
1261 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1262 @*/
1263 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1264 {
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1269   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1274 {
1275   PC_MG   *mg      = (PC_MG*)pc->data;
1276   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1277 
1278   PetscFunctionBegin;
1279   pc_gamg->use_parallel_coarse_grid_solver = flg;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /*@
1284    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1285 
1286    Collective on PC
1287 
1288    Input Parameters:
1289 +  pc - the preconditioner context
1290 -  flg - PETSC_TRUE to pin coarse grids to CPU
1291 
1292    Options Database Key:
1293 .  -pc_gamg_cpu_pin_coarse_grids
1294 
1295    Level: intermediate
1296 
1297 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1298 @*/
1299 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1300 {
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1305   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1310 {
1311   PC_MG   *mg      = (PC_MG*)pc->data;
1312   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1313 
1314   PetscFunctionBegin;
1315   pc_gamg->cpu_pin_coarse_grids = flg;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*@
1320    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1321 
1322    Collective on PC
1323 
1324    Input Parameters:
1325 +  pc - the preconditioner context
1326 -  flg - Layout type
1327 
1328    Options Database Key:
1329 .  -pc_gamg_coarse_grid_layout_type
1330 
1331    Level: intermediate
1332 
1333 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1334 @*/
1335 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1336 {
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1341   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1346 {
1347   PC_MG   *mg      = (PC_MG*)pc->data;
1348   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1349 
1350   PetscFunctionBegin;
1351   pc_gamg->layout_type = flg;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 /*@
1356    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1357 
1358    Not collective on PC
1359 
1360    Input Parameters:
1361 +  pc - the preconditioner
1362 -  n - the maximum number of levels to use
1363 
1364    Options Database Key:
1365 .  -pc_mg_levels
1366 
1367    Level: intermediate
1368 
1369 .seealso: ()
1370 @*/
1371 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1377   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1382 {
1383   PC_MG   *mg      = (PC_MG*)pc->data;
1384   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1385 
1386   PetscFunctionBegin;
1387   pc_gamg->Nlevels = n;
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*@
1392    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1393 
1394    Not collective on PC
1395 
1396    Input Parameters:
1397 +  pc - the preconditioner context
1398 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1399 -  n - number of threshold values provided in array
1400 
1401    Options Database Key:
1402 .  -pc_gamg_threshold <threshold>
1403 
1404    Notes:
1405     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1406     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1407 
1408     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1409     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1410     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1411 
1412    Level: intermediate
1413 
1414 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1415 @*/
1416 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1417 {
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1422   if (n) PetscValidRealPointer(v,2);
1423   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1428 {
1429   PC_MG   *mg      = (PC_MG*)pc->data;
1430   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1431   PetscInt i;
1432   PetscFunctionBegin;
1433   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1434   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 /*@
1439    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1440 
1441    Collective on PC
1442 
1443    Input Parameters:
1444 +  pc - the preconditioner context
1445 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1446 -  n - number of values provided in array
1447 
1448    Options Database Key:
1449 .  -pc_gamg_rank_reduction_factors <factors>
1450 
1451    Level: intermediate
1452 
1453 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1454 @*/
1455 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1456 {
1457   PetscErrorCode ierr;
1458 
1459   PetscFunctionBegin;
1460   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1461   if (n) PetscValidIntPointer(v,2);
1462   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1467 {
1468   PC_MG   *mg      = (PC_MG*)pc->data;
1469   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1470   PetscInt i;
1471   PetscFunctionBegin;
1472   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1473   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /*@
1478    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1479 
1480    Not collective on PC
1481 
1482    Input Parameters:
1483 +  pc - the preconditioner context
1484 -  scale - the threshold value reduction, ussually < 1.0
1485 
1486    Options Database Key:
1487 .  -pc_gamg_threshold_scale <v>
1488 
1489    Notes:
1490    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1491    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1492 
1493    Level: advanced
1494 
1495 .seealso: PCGAMGSetThreshold()
1496 @*/
1497 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1498 {
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1503   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1508 {
1509   PC_MG   *mg      = (PC_MG*)pc->data;
1510   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1511   PetscFunctionBegin;
1512   pc_gamg->threshold_scale = v;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 /*@C
1517    PCGAMGSetType - Set solution method
1518 
1519    Collective on PC
1520 
1521    Input Parameters:
1522 +  pc - the preconditioner context
1523 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1524 
1525    Options Database Key:
1526 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1527 
1528    Level: intermediate
1529 
1530 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1531 @*/
1532 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1538   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 /*@C
1543    PCGAMGGetType - Get solution method
1544 
1545    Collective on PC
1546 
1547    Input Parameter:
1548 .  pc - the preconditioner context
1549 
1550    Output Parameter:
1551 .  type - the type of algorithm used
1552 
1553    Level: intermediate
1554 
1555 .seealso: PCGAMGSetType(), PCGAMGType
1556 @*/
1557 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1558 {
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1563   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1568 {
1569   PC_MG          *mg      = (PC_MG*)pc->data;
1570   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1571 
1572   PetscFunctionBegin;
1573   *type = pc_gamg->type;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1578 {
1579   PetscErrorCode ierr,(*r)(PC);
1580   PC_MG          *mg      = (PC_MG*)pc->data;
1581   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1582 
1583   PetscFunctionBegin;
1584   pc_gamg->type = type;
1585   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1586   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1587   if (pc_gamg->ops->destroy) {
1588     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1589     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1590     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1591     /* cleaning up common data in pc_gamg - this should disapear someday */
1592     pc_gamg->data_cell_cols = 0;
1593     pc_gamg->data_cell_rows = 0;
1594     pc_gamg->orig_data_cell_cols = 0;
1595     pc_gamg->orig_data_cell_rows = 0;
1596     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1597     pc_gamg->data_sz = 0;
1598   }
1599   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1600   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1601   ierr = (*r)(pc);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 /* -------------------------------------------------------------------------- */
1606 /*
1607    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1608 
1609    Input Parameter:
1610 .  pc - the preconditioner context
1611 
1612    Output Parameter:
1613 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1614 
1615    Level: advanced
1616 */
1617 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1618 {
1619   PetscErrorCode ierr;
1620   PC_MG          *mg      = (PC_MG*)pc->data;
1621   PC_MG_Levels   **mglevels = mg->levels;
1622   PetscInt       lev;
1623   PetscLogDouble nnz0 = 0, sgc = 0;
1624   MatInfo        info;
1625 
1626   PetscFunctionBegin;
1627   if (!pc->setupcalled) {
1628     *gc = 0;
1629     PetscFunctionReturn(0);
1630   }
1631   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1632   for (lev=0; lev<mg->nlevels; lev++) {
1633     Mat dB;
1634     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1635     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1636     sgc += info.nz_used;
1637     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1638   }
1639   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1640   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1645 {
1646   PetscErrorCode ierr,i;
1647   PC_MG          *mg      = (PC_MG*)pc->data;
1648   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1649   PetscReal       gc=0;
1650   PetscFunctionBegin;
1651   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1652   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1653   for (i=0;i<mg->nlevels; i++) {
1654     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1655   }
1656   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1657   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1658   if (pc_gamg->use_aggs_in_asm) {
1659     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1660   }
1661   if (pc_gamg->use_parallel_coarse_grid_solver) {
1662     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1663   }
1664 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1665   if (pc_gamg->cpu_pin_coarse_grids) {
1666     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1667   }
1668 #endif
1669   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1670   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1671   /* } else { */
1672   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1673   /* } */
1674   if (pc_gamg->ops->view) {
1675     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1676   }
1677   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1678   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1683 {
1684   PetscErrorCode ierr;
1685   PC_MG          *mg      = (PC_MG*)pc->data;
1686   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1687   PetscBool      flag,f2;
1688   MPI_Comm       comm;
1689   char           prefix[256],tname[32];
1690   PetscInt       i,n;
1691   const char     *pcpre;
1692   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1693   PetscFunctionBegin;
1694   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1695   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1696   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1697   if (flag) {
1698     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1699   }
1700   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1701   if (flag) {
1702     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1703   }
1704   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1705   f2 = PETSC_TRUE;
1706   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1707   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1708   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1709   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1710   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1711   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1712   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1713   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1714   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1715   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1716   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1717   n = PETSC_MG_MAXLEVELS;
1718   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1719   if (!flag || n < PETSC_MG_MAXLEVELS) {
1720     if (!flag) n = 1;
1721     i = n;
1722     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1723   }
1724   n = PETSC_MG_MAXLEVELS;
1725   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1726   if (!flag) i = 0;
1727   else i = n;
1728   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1729   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1730   {
1731     PetscReal eminmax[2] = {0., 0.};
1732     n = 2;
1733     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1734     if (flag) {
1735       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1736       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1737     }
1738   }
1739   /* set options for subtype */
1740   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1741 
1742   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1743   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1744   ierr = PetscOptionsTail();CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* -------------------------------------------------------------------------- */
1749 /*MC
1750      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1751 
1752    Options Database Keys:
1753 +   -pc_gamg_type <type> - one of agg, geo, or classical
1754 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1755 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1756 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1757 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1758                                         equations on each process that has degrees of freedom
1759 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1760 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1761 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1762 
1763    Options Database Keys for default Aggregation:
1764 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1765 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1766 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1767 
1768    Multigrid options:
1769 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1770 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1771 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1772 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1773 
1774 
1775   Notes:
1776     In order to obtain good performance for PCGAMG for vector valued problems you must
1777        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1778        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1779        See the Users Manual Chapter 4 for more details
1780 
1781   Level: intermediate
1782 
1783 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1784            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1785 M*/
1786 
1787 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1788 {
1789   PetscErrorCode ierr,i;
1790   PC_GAMG        *pc_gamg;
1791   PC_MG          *mg;
1792 
1793   PetscFunctionBegin;
1794    /* register AMG type */
1795   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1796 
1797   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1798   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1799   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1800 
1801   /* create a supporting struct and attach it to pc */
1802   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1803   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1804   mg           = (PC_MG*)pc->data;
1805   mg->innerctx = pc_gamg;
1806 
1807   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1808 
1809   /* these should be in subctx but repartitioning needs simple arrays */
1810   pc_gamg->data_sz = 0;
1811   pc_gamg->data    = NULL;
1812 
1813   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1814   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1815   pc->ops->setup          = PCSetUp_GAMG;
1816   pc->ops->reset          = PCReset_GAMG;
1817   pc->ops->destroy        = PCDestroy_GAMG;
1818   mg->view                = PCView_GAMG;
1819 
1820   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1832   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1834   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1835   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1836   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1838   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1839   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1840   pc_gamg->repart           = PETSC_FALSE;
1841   pc_gamg->reuse_prol       = PETSC_FALSE;
1842   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1843   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1844   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1845   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1846   pc_gamg->min_eq_proc      = 50;
1847   pc_gamg->coarse_eq_limit  = 50;
1848   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1849   pc_gamg->threshold_scale = 1.;
1850   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1851   pc_gamg->current_level    = 0; /* don't need to init really */
1852   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1853   pc_gamg->esteig_max_it    = 10;
1854   pc_gamg->use_sa_esteig    = -1;
1855   pc_gamg->emin             = 0;
1856   pc_gamg->emax             = 0;
1857 
1858   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1859 
1860   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1861   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 /*@C
1866  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1867     from PCInitializePackage().
1868 
1869  Level: developer
1870 
1871  .seealso: PetscInitialize()
1872 @*/
1873 PetscErrorCode PCGAMGInitializePackage(void)
1874 {
1875   PetscErrorCode ierr;
1876 
1877   PetscFunctionBegin;
1878   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1879   PCGAMGPackageInitialized = PETSC_TRUE;
1880   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1881   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1882   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1883   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1884 
1885   /* general events */
1886   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1887   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1888   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1889   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1890   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1891   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1892   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1893 
1894 #if defined PETSC_GAMG_USE_LOG
1895   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1896   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1897   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1898   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1899   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1900   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1901   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1902   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1903   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1904   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1905   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1906   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1907   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1908   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1909   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1910   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1911   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1912 
1913   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1914   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1915   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1916   /* create timer stages */
1917 #if defined GAMG_STAGES
1918   {
1919     char     str[32];
1920     PetscInt lidx;
1921     sprintf(str,"MG Level %d (finest)",0);
1922     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1923     for (lidx=1; lidx<9; lidx++) {
1924       sprintf(str,"MG Level %d",(int)lidx);
1925       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1926     }
1927   }
1928 #endif
1929 #endif
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 /*@C
1934  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1935     called from PetscFinalize() automatically.
1936 
1937  Level: developer
1938 
1939  .seealso: PetscFinalize()
1940 @*/
1941 PetscErrorCode PCGAMGFinalizePackage(void)
1942 {
1943   PetscErrorCode ierr;
1944 
1945   PetscFunctionBegin;
1946   PCGAMGPackageInitialized = PETSC_FALSE;
1947   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 /*@C
1952  PCGAMGRegister - Register a PCGAMG implementation.
1953 
1954  Input Parameters:
1955  + type - string that will be used as the name of the GAMG type.
1956  - create - function for creating the gamg context.
1957 
1958   Level: advanced
1959 
1960  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1961 @*/
1962 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1963 {
1964   PetscErrorCode ierr;
1965 
1966   PetscFunctionBegin;
1967   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1968   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972