xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision a3d0cf85f51e7fdec4f84d2d39b29bdf0224b99b)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /* Hack to access same_local_solves */
7 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
8 
9 #if defined(PETSC_HAVE_CUDA)
10   #include <cuda_runtime.h>
11 #endif
12 
13 #if defined(PETSC_HAVE_HIP)
14   #include <hip/hip_runtime.h>
15 #endif
16 
17 #if defined PETSC_GAMG_USE_LOG
18 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
19 #endif
20 
21 #if defined PETSC_USE_LOG
22 PetscLogEvent PC_GAMGGraph_AGG;
23 PetscLogEvent PC_GAMGGraph_GEO;
24 PetscLogEvent PC_GAMGCoarsen_AGG;
25 PetscLogEvent PC_GAMGCoarsen_GEO;
26 PetscLogEvent PC_GAMGProlongator_AGG;
27 PetscLogEvent PC_GAMGProlongator_GEO;
28 PetscLogEvent PC_GAMGOptProlongator_AGG;
29 #endif
30 
31 /* #define GAMG_STAGES */
32 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
33 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
34 #endif
35 
36 static PetscFunctionList GAMGList = NULL;
37 static PetscBool PCGAMGPackageInitialized;
38 
39 /* ----------------------------------------------------------------------------- */
40 PetscErrorCode PCReset_GAMG(PC pc)
41 {
42   PetscErrorCode ierr, level;
43   PC_MG          *mg      = (PC_MG*)pc->data;
44   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
45 
46   PetscFunctionBegin;
47   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
48   pc_gamg->data_sz = 0;
49   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
50   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
51     mg->min_eigen_DinvA[level] = 0;
52     mg->max_eigen_DinvA[level] = 0;
53   }
54   pc_gamg->emin = 0;
55   pc_gamg->emax = 0;
56   PetscFunctionReturn(0);
57 }
58 
59 /* -------------------------------------------------------------------------- */
60 /*
61    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
62      of active processors.
63 
64    Input Parameter:
65    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
66           'pc_gamg->data_sz' are changed via repartitioning/reduction.
67    . Amat_fine - matrix on this fine (k) level
68    . cr_bs - coarse block size
69    In/Output Parameter:
70    . a_P_inout - prolongation operator to the next level (k-->k-1)
71    . a_nactive_proc - number of active procs
72    Output Parameter:
73    . a_Amat_crs - coarse matrix that is created (k-1)
74 */
75 
76 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
77 {
78   PetscErrorCode  ierr;
79   PC_MG           *mg         = (PC_MG*)pc->data;
80   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
81   Mat             Cmat,Pold=*a_P_inout;
82   MPI_Comm        comm;
83   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
84   PetscInt        ncrs_eq,ncrs,f_bs;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
88   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
89   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
90   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
91   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
92 
93   if (Pcolumnperm) *Pcolumnperm = NULL;
94 
95   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
96   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
97   if (pc_gamg->data_cell_rows>0) {
98     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
99   } else {
100     PetscInt  bs;
101     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
102     ncrs = ncrs_eq/bs;
103   }
104   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
105   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
106 #if defined(PETSC_HAVE_CUDA)
107     PetscShmComm pshmcomm;
108     PetscMPIInt  locrank;
109     MPI_Comm     loccomm;
110     PetscInt     s_nnodes,r_nnodes, new_new_size;
111     cudaError_t  cerr;
112     int          devCount;
113     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
114     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
115     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRQ(ierr);
116     s_nnodes = !locrank;
117     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
118     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
119     devCount = 0;
120     cerr = cudaGetDeviceCount(&devCount);
121     cudaGetLastError(); /* Reset the last error */
122     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
123       new_new_size = r_nnodes * devCount;
124       new_size = new_new_size;
125       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %D --> %D (devCount=%D #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
126     } else {
127       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
128       goto HEURISTIC;
129     }
130 #else
131     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
132 #endif
133   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
134     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
135     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
136     ierr = PetscInfo3(pc,"Manually setting reduction to %D active processes (%D/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
137   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
138     new_size = 1;
139     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %D active processoes\n",new_size);CHKERRQ(ierr);
140   } else {
141     PetscInt ncrs_eq_glob;
142 #if defined(PETSC_HAVE_CUDA)
143     HEURISTIC:
144 #endif
145     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
146     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
147     if (!new_size) new_size = 1; /* not likely, posible? */
148     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
149     ierr = PetscInfo1(pc,"Coarse grid reduction to %D active processoes\n",new_size);CHKERRQ(ierr);
150   }
151   if (new_size==nactive) {
152     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
153     if (new_size < size) {
154       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
155       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%D) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
156       if (pc_gamg->cpu_pin_coarse_grids) {
157         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
158         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
159       }
160     }
161     /* we know that the grid structure can be reused in MatPtAP */
162   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
163     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
164     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
165     nloc_old = ncrs_eq/cr_bs;
166     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
167     /* get new_size and rfactor */
168     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
169       /* find factor */
170       if (new_size == 1) rfactor = size; /* don't modify */
171       else {
172         PetscReal best_fact = 0.;
173         jj = -1;
174         for (kk = 1 ; kk <= size ; kk++) {
175           if (!(size%kk)) { /* a candidate */
176             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
177             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
178             if (fact > best_fact) {
179               best_fact = fact; jj = kk;
180             }
181           }
182         }
183         if (jj != -1) rfactor = jj;
184         else rfactor = 1; /* a prime */
185         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
186         else expand_factor = rfactor;
187       }
188       new_size = size/rfactor; /* make new size one that is factor */
189       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
190         *a_Amat_crs = Cmat;
191         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%D, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
192         PetscFunctionReturn(0);
193       }
194     }
195 #if defined PETSC_GAMG_USE_LOG
196     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
197 #endif
198     /* make 'is_eq_newproc' */
199     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
200     if (pc_gamg->repart) {
201       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
202       Mat      adj;
203       ierr = PetscInfo4(pc,"Repartition: size (active): %D --> %D, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
204       /* get 'adj' */
205       if (cr_bs == 1) {
206         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
207       } else {
208         /* make a scalar matrix to partition (no Stokes here) */
209         Mat               tMat;
210         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
211         const PetscScalar *vals;
212         const PetscInt    *idx;
213         PetscInt          *d_nnz, *o_nnz, M, N;
214         static PetscInt   llev = 0; /* ugly but just used for debugging */
215         MatType           mtype;
216 
217         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
218         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
219         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
220         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
221           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
222           d_nnz[jj] = ncols/cr_bs;
223           o_nnz[jj] = ncols/cr_bs;
224           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
225           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
226           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
227         }
228 
229         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
230         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
231         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
232         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
233         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
234         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
235         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
236 
237         for (ii = Istart_crs; ii < Iend_crs; ii++) {
238           PetscInt dest_row = ii/cr_bs;
239           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
240           for (jj = 0; jj < ncols; jj++) {
241             PetscInt    dest_col = idx[jj]/cr_bs;
242             PetscScalar v        = 1.0;
243             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
244           }
245           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
246         }
247         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
249 
250         if (llev++ == -1) {
251           PetscViewer viewer; char fname[32];
252           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
253           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
254           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
255           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
256         }
257         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
258         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
259       } /* create 'adj' */
260 
261       { /* partition: get newproc_idx */
262         char            prefix[256];
263         const char      *pcpre;
264         const PetscInt  *is_idx;
265         MatPartitioning mpart;
266         IS              proc_is;
267 
268         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
270         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
271         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
272         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
273         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
274         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
275         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
276         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
277 
278         /* collect IS info */
279         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
280         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
281         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
282           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
283             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
284           }
285         }
286         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
287         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
288       }
289       ierr = MatDestroy(&adj);CHKERRQ(ierr);
290 
291       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
292       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
293     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
294       PetscInt targetPE;
295       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
296       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
297       targetPE = (rank/rfactor)*expand_factor;
298       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
299     } /* end simple 'is_eq_newproc' */
300 
301     /*
302       Create an index set from the is_eq_newproc index set to indicate the mapping TO
303     */
304     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
305     is_eq_num_prim = is_eq_num;
306     /*
307       Determine how many equations/vertices are assigned to each processor
308     */
309     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
310     ncrs_eq_new = counts[rank];
311     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
312     ncrs_new = ncrs_eq_new/cr_bs;
313 
314     ierr = PetscFree(counts);CHKERRQ(ierr);
315     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
316     {
317       Vec            src_crd, dest_crd;
318       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
319       VecScatter     vecscat;
320       PetscScalar    *array;
321       IS isscat;
322       /* move data (for primal equations only) */
323       /* Create a vector to contain the newly ordered element information */
324       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
325       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
326       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
327       /*
328         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
329         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
330       */
331       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
332       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       for (ii=0,jj=0; ii<ncrs; ii++) {
334         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
335         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
336       }
337       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
338       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
339       ierr = PetscFree(tidx);CHKERRQ(ierr);
340       /*
341         Create a vector to contain the original vertex information for each element
342       */
343       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
344       for (jj=0; jj<ndata_cols; jj++) {
345         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
346         for (ii=0; ii<ncrs; ii++) {
347           for (kk=0; kk<ndata_rows; kk++) {
348             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
349             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
350             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
351           }
352         }
353       }
354       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
355       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
356       /*
357         Scatter the element vertex information (still in the original vertex ordering)
358         to the correct processor
359       */
360       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
361       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
362       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
363       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
364       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
365       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
366       /*
367         Put the element vertex data into a new allocation of the gdata->ele
368       */
369       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
370       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
371 
372       pc_gamg->data_sz = node_data_sz*ncrs_new;
373       strideNew        = ncrs_new*ndata_rows;
374 
375       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
376       for (jj=0; jj<ndata_cols; jj++) {
377         for (ii=0; ii<ncrs_new; ii++) {
378           for (kk=0; kk<ndata_rows; kk++) {
379             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
380             pc_gamg->data[ix] = PetscRealPart(array[jx]);
381           }
382         }
383       }
384       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
385       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
386     }
387     /* move A and P (columns) with new layout */
388 #if defined PETSC_GAMG_USE_LOG
389     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
390 #endif
391     /*
392       Invert for MatCreateSubMatrix
393     */
394     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
395     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
396     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
397     if (is_eq_num != is_eq_num_prim) {
398       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
399     }
400     if (Pcolumnperm) {
401       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
402       *Pcolumnperm = new_eq_indices;
403     }
404     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
405 #if defined PETSC_GAMG_USE_LOG
406     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
407     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
408 #endif
409     /* 'a_Amat_crs' output */
410     {
411       Mat mat;
412       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
413       *a_Amat_crs = mat;
414     }
415     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
416 
417 #if defined PETSC_GAMG_USE_LOG
418     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
419 #endif
420     /* prolongator */
421     {
422       IS       findices;
423       PetscInt Istart,Iend;
424       Mat      Pnew;
425 
426       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
427 #if defined PETSC_GAMG_USE_LOG
428       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
429 #endif
430       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
431       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
432       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
433       ierr = ISDestroy(&findices);CHKERRQ(ierr);
434 
435 #if defined PETSC_GAMG_USE_LOG
436       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
437 #endif
438       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
439 
440       /* output - repartitioned */
441       *a_P_inout = Pnew;
442     }
443     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
444 
445     *a_nactive_proc = new_size; /* output */
446 
447     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
448     if (pc_gamg->cpu_pin_coarse_grids) {
449 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
450       static PetscInt llev = 2;
451       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
452 #endif
453       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
454       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
455       if (1) { /* lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
456         Mat         A = *a_Amat_crs, P = *a_P_inout;
457         PetscMPIInt size;
458         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
459         if (size > 1) {
460           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
461           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
462           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
463         }
464       }
465     }
466 #if defined PETSC_GAMG_USE_LOG
467     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
468 #endif
469   }
470   PetscFunctionReturn(0);
471 }
472 
473 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
474 {
475   PetscErrorCode ierr;
476   const char     *prefix;
477   char           addp[32];
478   PC_MG          *mg      = (PC_MG*)a_pc->data;
479   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
480 
481   PetscFunctionBegin;
482   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
483   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
484   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
485   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
486   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
487   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
488   ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
489   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
490   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
491   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
492   (*Gmat2)->assembled = PETSC_TRUE;
493   PetscFunctionReturn(0);
494 }
495 
496 /* -------------------------------------------------------------------------- */
497 /*
498    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
499                     by setting data structures and options.
500 
501    Input Parameter:
502 .  pc - the preconditioner context
503 
504 */
505 PetscErrorCode PCSetUp_GAMG(PC pc)
506 {
507   PetscErrorCode ierr;
508   PC_MG          *mg      = (PC_MG*)pc->data;
509   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
510   Mat            Pmat     = pc->pmat;
511   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
512   MPI_Comm       comm;
513   PetscMPIInt    rank,size,nactivepe;
514   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
515   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
516   PetscLogDouble nnz0=0.,nnztot=0.;
517   MatInfo        info;
518   PetscBool      is_last = PETSC_FALSE;
519 
520   PetscFunctionBegin;
521   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
522   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
524 
525   if (pc_gamg->setup_count++ > 0) {
526     if ((PetscBool)(!pc_gamg->reuse_prol)) {
527       /* reset everything */
528       ierr = PCReset_MG(pc);CHKERRQ(ierr);
529       pc->setupcalled = 0;
530     } else {
531       PC_MG_Levels **mglevels = mg->levels;
532       /* just do Galerkin grids */
533       Mat          B,dA,dB;
534 
535       if (!pc->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCSetUp() has not been called yet");
536       if (pc_gamg->Nlevels > 1) {
537         /* currently only handle case where mat and pmat are the same on coarser levels */
538         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
539         /* (re)set to get dirty flag */
540         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
541 
542         for (level=pc_gamg->Nlevels-2; level>=0; level--) {
543           /* 2nd solve, matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
544           if (pc_gamg->setup_count==2 /* && pc_gamg->repart||reduction */) {
545             ierr = PetscInfo2(pc,"new RAP after first solve level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
546             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_INITIAL_MATRIX,2.0,&B);CHKERRQ(ierr);
547             ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr);
548             mglevels[level]->A = B;
549           } else {
550             ierr = PetscInfo2(pc,"RAP after first solve reusing matrix level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
551             ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
552             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_REUSE_MATRIX,1.0,&B);CHKERRQ(ierr);
553           }
554           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
555           dB   = B;
556         }
557       }
558 
559       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
560       PetscFunctionReturn(0);
561     }
562   }
563 
564   if (!pc_gamg->data) {
565     if (pc_gamg->orig_data) {
566       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
567       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
568 
569       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
570       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
571       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
572 
573       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
574       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
575     } else {
576       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
577       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
578     }
579   }
580 
581   /* cache original data for reuse */
582   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
583     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
584     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
585     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
586     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
587   }
588 
589   /* get basic dims */
590   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
591   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
592 
593   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
594   nnz0   = info.nz_used;
595   nnztot = info.nz_used;
596   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
597 
598   /* Get A_i and R_i */
599   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
600     pc_gamg->current_level = level;
601     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
602     level1 = level + 1;
603 #if defined PETSC_GAMG_USE_LOG
604     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
605 #if (defined GAMG_STAGES)
606     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
607 #endif
608 #endif
609     { /* construct prolongator */
610       Mat              Gmat;
611       PetscCoarsenData *agg_lists;
612       Mat              Prol11;
613 
614       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
615       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
616       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
617 
618       /* could have failed to create new level */
619       if (Prol11) {
620         const char *prefix;
621         char       addp[32];
622 
623         /* get new block size of coarse matrices */
624         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
625 
626         if (pc_gamg->ops->optprolongator) {
627           /* smooth */
628           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
629         }
630 
631         if (pc_gamg->use_aggs_in_asm) {
632           PetscInt bs;
633           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
634           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
635         }
636 
637         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
638         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
639         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
640         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
641         /* if we reuse the prolongator, then it is better to generate the transpose by default with CUDA
642            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
643 #if defined(PETSC_HAVE_CUDA)
644         {
645           PetscBool ismpiaij;
646 
647           ierr = PetscObjectBaseTypeCompare((PetscObject)Prol11,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr);
648           if (ismpiaij) {
649             Mat Prol_d,Prol_o;
650 
651             ierr = MatMPIAIJGetSeqAIJ(Prol11,&Prol_d,&Prol_o,NULL);CHKERRQ(ierr);
652             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol_d,pc_gamg->reuse_prol);CHKERRQ(ierr);
653             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol_o,pc_gamg->reuse_prol);CHKERRQ(ierr);
654           } else {
655             ierr = MatSeqAIJCUSPARSESetGenerateTranspose(Prol11,pc_gamg->reuse_prol);CHKERRQ(ierr);
656           }
657         }
658 #endif
659         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
660         Parr[level1] = Prol11;
661       } else Parr[level1] = NULL; /* failed to coarsen */
662 
663       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
664       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
665     } /* construct prolongator scope */
666 #if defined PETSC_GAMG_USE_LOG
667     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
668 #endif
669     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
670     if (!Parr[level1]) { /* failed to coarsen */
671       ierr =  PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
672 #if defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES
673       ierr = PetscLogStagePop();CHKERRQ(ierr);
674 #endif
675       break;
676     }
677 #if defined PETSC_GAMG_USE_LOG
678     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
679 #endif
680     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
681     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
682     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
683     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
684     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
685 
686 #if defined PETSC_GAMG_USE_LOG
687     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
688 #endif
689     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
690     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
691     nnztot += info.nz_used;
692     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
693 
694 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
695     ierr = PetscLogStagePop();CHKERRQ(ierr);
696 #endif
697     /* stop if one node or one proc -- could pull back for singular problems */
698     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
699       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
700       level++;
701       break;
702     }
703   } /* levels */
704   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
705 
706   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
707   pc_gamg->Nlevels = level + 1;
708   fine_level       = level;
709   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
710 
711   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
712     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
713 
714     /* set default smoothers & set operators */
715     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
716       KSP smoother;
717       PC  subpc;
718 
719       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
720       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
721 
722       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
723       /* set ops */
724       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
725       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
726 
727       /* set defaults */
728       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
729 
730       /* set blocks for ASM smoother that uses the 'aggregates' */
731       if (pc_gamg->use_aggs_in_asm) {
732         PetscInt sz;
733         IS       *iss;
734 
735         sz   = nASMBlocksArr[level];
736         iss  = ASMLocalIDsArr[level];
737         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
738         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
739         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
740         if (!sz) {
741           IS       is;
742           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
743           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
744           ierr = ISDestroy(&is);CHKERRQ(ierr);
745         } else {
746           PetscInt kk;
747           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
748           for (kk=0; kk<sz; kk++) {
749             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
750           }
751           ierr = PetscFree(iss);CHKERRQ(ierr);
752         }
753         ASMLocalIDsArr[level] = NULL;
754         nASMBlocksArr[level]  = 0;
755       } else {
756         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
757       }
758     }
759     {
760       /* coarse grid */
761       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
762       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
763 
764       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
765       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
766       if (!pc_gamg->use_parallel_coarse_grid_solver) {
767         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
768         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
769         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
770         ierr = PCSetUp(subpc);CHKERRQ(ierr);
771         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
772         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
773         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
774         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
775         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
776         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
777         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
778         /* This flag gets reset by PCBJacobiGetSubKSP(), but our BJacobi really does the same algorithm everywhere (and in
779          * fact, all but one process will have zero dofs), so we reset the flag to avoid having PCView_BJacobi attempt to
780          * view every subdomain as though they were different. */
781         ((PC_BJacobi*)subpc->data)->same_local_solves = PETSC_TRUE;
782       }
783     }
784 
785     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
786     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
787     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
788     ierr = PetscOptionsEnd();CHKERRQ(ierr);
789     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
790 
791     /* setup cheby eigen estimates from SA */
792     if (pc_gamg->use_sa_esteig==1) {
793       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
794         KSP       smoother;
795         PetscBool ischeb;
796 
797         savesetfromoptions[level] = NULL;
798         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
799         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
800         if (ischeb) {
801           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
802 
803           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
804           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
805             PC        subpc;
806             PetscBool isjac;
807             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
808             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
809             if (isjac && pc_gamg->use_sa_esteig==1) {
810               PetscReal emax,emin;
811 
812               emin = mg->min_eigen_DinvA[level];
813               emax = mg->max_eigen_DinvA[level];
814               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
815               cheb->emin_computed = emin;
816               cheb->emax_computed = emax;
817               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
818 
819               /* We have set the eigenvalues and consumed the transformation values
820                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
821                  below when setfromoptions will be called again */
822               savesetfromoptions[level] = smoother->ops->setfromoptions;
823               smoother->ops->setfromoptions = NULL;
824             }
825           }
826         }
827       }
828     }
829 
830     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
831 
832     /* restore Chebyshev smoother for next calls */
833     if (pc_gamg->use_sa_esteig==1) {
834       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
835         if (savesetfromoptions[level]) {
836           KSP smoother;
837           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
838           smoother->ops->setfromoptions = savesetfromoptions[level];
839         }
840       }
841     }
842 
843     /* clean up */
844     for (level=1; level<pc_gamg->Nlevels; level++) {
845       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
846       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
847     }
848   } else {
849     KSP smoother;
850 
851     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
852     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
853     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
854     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
855     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 /* ------------------------------------------------------------------------- */
861 /*
862  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
863    that was created with PCCreate_GAMG().
864 
865    Input Parameter:
866 .  pc - the preconditioner context
867 
868    Application Interface Routine: PCDestroy()
869 */
870 PetscErrorCode PCDestroy_GAMG(PC pc)
871 {
872   PetscErrorCode ierr;
873   PC_MG          *mg     = (PC_MG*)pc->data;
874   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
875 
876   PetscFunctionBegin;
877   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
878   if (pc_gamg->ops->destroy) {
879     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
880   }
881   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
882   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
883   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
884   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 /*@
889    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
890 
891    Logically Collective on PC
892 
893    Input Parameters:
894 +  pc - the preconditioner context
895 -  n - the number of equations
896 
897 
898    Options Database Key:
899 .  -pc_gamg_process_eq_limit <limit>
900 
901    Notes:
902     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
903           that has degrees of freedom
904 
905    Level: intermediate
906 
907 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
908 @*/
909 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
910 {
911   PetscErrorCode ierr;
912 
913   PetscFunctionBegin;
914   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
915   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
920 {
921   PC_MG   *mg      = (PC_MG*)pc->data;
922   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
923 
924   PetscFunctionBegin;
925   if (n>0) pc_gamg->min_eq_proc = n;
926   PetscFunctionReturn(0);
927 }
928 
929 /*@
930    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
931 
932  Collective on PC
933 
934    Input Parameters:
935 +  pc - the preconditioner context
936 -  n - maximum number of equations to aim for
937 
938    Options Database Key:
939 .  -pc_gamg_coarse_eq_limit <limit>
940 
941    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
942      has less than 1000 unknowns.
943 
944    Level: intermediate
945 
946 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
947 @*/
948 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
949 {
950   PetscErrorCode ierr;
951 
952   PetscFunctionBegin;
953   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
954   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
959 {
960   PC_MG   *mg      = (PC_MG*)pc->data;
961   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
962 
963   PetscFunctionBegin;
964   if (n>0) pc_gamg->coarse_eq_limit = n;
965   PetscFunctionReturn(0);
966 }
967 
968 /*@
969    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
970 
971    Collective on PC
972 
973    Input Parameters:
974 +  pc - the preconditioner context
975 -  n - PETSC_TRUE or PETSC_FALSE
976 
977    Options Database Key:
978 .  -pc_gamg_repartition <true,false>
979 
980    Notes:
981     this will generally improve the loading balancing of the work on each level
982 
983    Level: intermediate
984 
985 .seealso: ()
986 @*/
987 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
988 {
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
993   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
998 {
999   PC_MG   *mg      = (PC_MG*)pc->data;
1000   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1001 
1002   PetscFunctionBegin;
1003   pc_gamg->repart = n;
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 /*@
1008    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
1009 
1010    Collective on PC
1011 
1012    Input Parameters:
1013 +  pc - the preconditioner context
1014 -  n - number of its
1015 
1016    Options Database Key:
1017 .  -pc_gamg_esteig_ksp_max_it <its>
1018 
1019    Notes:
1020 
1021    Level: intermediate
1022 
1023 .seealso: ()
1024 @*/
1025 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
1026 {
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1031   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1036 {
1037   PC_MG   *mg      = (PC_MG*)pc->data;
1038   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1039 
1040   PetscFunctionBegin;
1041   pc_gamg->esteig_max_it = n;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 /*@
1046    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1047 
1048    Collective on PC
1049 
1050    Input Parameters:
1051 +  pc - the preconditioner context
1052 -  n - number of its
1053 
1054    Options Database Key:
1055 .  -pc_gamg_use_sa_esteig <true,false>
1056 
1057    Notes:
1058 
1059    Level: intermediate
1060 
1061 .seealso: ()
1062 @*/
1063 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1064 {
1065   PetscErrorCode ierr;
1066 
1067   PetscFunctionBegin;
1068   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1069   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1074 {
1075   PC_MG   *mg      = (PC_MG*)pc->data;
1076   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1077 
1078   PetscFunctionBegin;
1079   pc_gamg->use_sa_esteig = n ? 1 : 0;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /*@C
1084    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1085 
1086    Collective on PC
1087 
1088    Input Parameters:
1089 +  pc - the preconditioner context
1090 -  t - ksp type
1091 
1092    Options Database Key:
1093 .  -pc_gamg_esteig_ksp_type <type>
1094 
1095    Notes:
1096 
1097    Level: intermediate
1098 
1099 .seealso: ()
1100 @*/
1101 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1102 {
1103   PetscErrorCode ierr;
1104 
1105   PetscFunctionBegin;
1106   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1107   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1112 {
1113   PetscErrorCode ierr;
1114   PC_MG   *mg      = (PC_MG*)pc->data;
1115   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1116 
1117   PetscFunctionBegin;
1118   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 /*@
1123    PCGAMGSetEigenvalues - Set eigenvalues
1124 
1125    Collective on PC
1126 
1127    Input Parameters:
1128 +  pc - the preconditioner context
1129 -  emax - max eigenvalue
1130 .  emin - min eigenvalue
1131 
1132    Options Database Key:
1133 .  -pc_gamg_eigenvalues
1134 
1135    Level: intermediate
1136 
1137 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1138 @*/
1139 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1140 {
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1145   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1149 {
1150   PC_MG          *mg      = (PC_MG*)pc->data;
1151   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1152 
1153   PetscFunctionBegin;
1154   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1155   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1156   pc_gamg->emax = emax;
1157   pc_gamg->emin = emin;
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*@
1163    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1164 
1165    Collective on PC
1166 
1167    Input Parameters:
1168 +  pc - the preconditioner context
1169 -  n - PETSC_TRUE or PETSC_FALSE
1170 
1171    Options Database Key:
1172 .  -pc_gamg_reuse_interpolation <true,false>
1173 
1174    Level: intermediate
1175 
1176    Notes:
1177     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1178           rebuilding the preconditioner quicker.
1179 
1180 .seealso: ()
1181 @*/
1182 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1183 {
1184   PetscErrorCode ierr;
1185 
1186   PetscFunctionBegin;
1187   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1188   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1193 {
1194   PC_MG   *mg      = (PC_MG*)pc->data;
1195   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1196 
1197   PetscFunctionBegin;
1198   pc_gamg->reuse_prol = n;
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 /*@
1203    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1204 
1205    Collective on PC
1206 
1207    Input Parameters:
1208 +  pc - the preconditioner context
1209 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1210 
1211    Options Database Key:
1212 .  -pc_gamg_asm_use_agg
1213 
1214    Level: intermediate
1215 
1216 .seealso: ()
1217 @*/
1218 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1219 {
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1224   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1229 {
1230   PC_MG   *mg      = (PC_MG*)pc->data;
1231   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1232 
1233   PetscFunctionBegin;
1234   pc_gamg->use_aggs_in_asm = flg;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 /*@
1239    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1240 
1241    Collective on PC
1242 
1243    Input Parameters:
1244 +  pc - the preconditioner context
1245 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1246 
1247    Options Database Key:
1248 .  -pc_gamg_use_parallel_coarse_grid_solver
1249 
1250    Level: intermediate
1251 
1252 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1253 @*/
1254 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1255 {
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1260   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1265 {
1266   PC_MG   *mg      = (PC_MG*)pc->data;
1267   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1268 
1269   PetscFunctionBegin;
1270   pc_gamg->use_parallel_coarse_grid_solver = flg;
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /*@
1275    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1276 
1277    Collective on PC
1278 
1279    Input Parameters:
1280 +  pc - the preconditioner context
1281 -  flg - PETSC_TRUE to pin coarse grids to CPU
1282 
1283    Options Database Key:
1284 .  -pc_gamg_cpu_pin_coarse_grids
1285 
1286    Level: intermediate
1287 
1288 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1289 @*/
1290 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1291 {
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1296   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1301 {
1302   PC_MG   *mg      = (PC_MG*)pc->data;
1303   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1304 
1305   PetscFunctionBegin;
1306   pc_gamg->cpu_pin_coarse_grids = flg;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 /*@
1311    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1312 
1313    Collective on PC
1314 
1315    Input Parameters:
1316 +  pc - the preconditioner context
1317 -  flg - Layout type
1318 
1319    Options Database Key:
1320 .  -pc_gamg_coarse_grid_layout_type
1321 
1322    Level: intermediate
1323 
1324 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1325 @*/
1326 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1327 {
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1332   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1337 {
1338   PC_MG   *mg      = (PC_MG*)pc->data;
1339   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1340 
1341   PetscFunctionBegin;
1342   pc_gamg->layout_type = flg;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 /*@
1347    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1348 
1349    Not collective on PC
1350 
1351    Input Parameters:
1352 +  pc - the preconditioner
1353 -  n - the maximum number of levels to use
1354 
1355    Options Database Key:
1356 .  -pc_mg_levels
1357 
1358    Level: intermediate
1359 
1360 .seealso: ()
1361 @*/
1362 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1363 {
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1368   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1373 {
1374   PC_MG   *mg      = (PC_MG*)pc->data;
1375   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1376 
1377   PetscFunctionBegin;
1378   pc_gamg->Nlevels = n;
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /*@
1383    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1384 
1385    Not collective on PC
1386 
1387    Input Parameters:
1388 +  pc - the preconditioner context
1389 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1390 -  n - number of threshold values provided in array
1391 
1392    Options Database Key:
1393 .  -pc_gamg_threshold <threshold>
1394 
1395    Notes:
1396     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1397     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1398 
1399     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1400     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1401     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1402 
1403    Level: intermediate
1404 
1405 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1406 @*/
1407 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1413   if (n) PetscValidRealPointer(v,2);
1414   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1419 {
1420   PC_MG   *mg      = (PC_MG*)pc->data;
1421   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1422   PetscInt i;
1423   PetscFunctionBegin;
1424   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1425   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 /*@
1430    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1431 
1432    Collective on PC
1433 
1434    Input Parameters:
1435 +  pc - the preconditioner context
1436 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1437 -  n - number of values provided in array
1438 
1439    Options Database Key:
1440 .  -pc_gamg_rank_reduction_factors <factors>
1441 
1442    Level: intermediate
1443 
1444 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1445 @*/
1446 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1447 {
1448   PetscErrorCode ierr;
1449 
1450   PetscFunctionBegin;
1451   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1452   if (n) PetscValidIntPointer(v,2);
1453   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1458 {
1459   PC_MG   *mg      = (PC_MG*)pc->data;
1460   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1461   PetscInt i;
1462   PetscFunctionBegin;
1463   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1464   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 /*@
1469    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1470 
1471    Not collective on PC
1472 
1473    Input Parameters:
1474 +  pc - the preconditioner context
1475 -  scale - the threshold value reduction, ussually < 1.0
1476 
1477    Options Database Key:
1478 .  -pc_gamg_threshold_scale <v>
1479 
1480    Notes:
1481    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1482    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1483 
1484    Level: advanced
1485 
1486 .seealso: PCGAMGSetThreshold()
1487 @*/
1488 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1489 {
1490   PetscErrorCode ierr;
1491 
1492   PetscFunctionBegin;
1493   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1494   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1499 {
1500   PC_MG   *mg      = (PC_MG*)pc->data;
1501   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1502   PetscFunctionBegin;
1503   pc_gamg->threshold_scale = v;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    PCGAMGSetType - Set solution method
1509 
1510    Collective on PC
1511 
1512    Input Parameters:
1513 +  pc - the preconditioner context
1514 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1515 
1516    Options Database Key:
1517 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1518 
1519    Level: intermediate
1520 
1521 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1522 @*/
1523 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1524 {
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1529   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /*@C
1534    PCGAMGGetType - Get solution method
1535 
1536    Collective on PC
1537 
1538    Input Parameter:
1539 .  pc - the preconditioner context
1540 
1541    Output Parameter:
1542 .  type - the type of algorithm used
1543 
1544    Level: intermediate
1545 
1546 .seealso: PCGAMGSetType(), PCGAMGType
1547 @*/
1548 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1549 {
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1554   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1559 {
1560   PC_MG          *mg      = (PC_MG*)pc->data;
1561   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1562 
1563   PetscFunctionBegin;
1564   *type = pc_gamg->type;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1569 {
1570   PetscErrorCode ierr,(*r)(PC);
1571   PC_MG          *mg      = (PC_MG*)pc->data;
1572   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1573 
1574   PetscFunctionBegin;
1575   pc_gamg->type = type;
1576   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1577   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1578   if (pc_gamg->ops->destroy) {
1579     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1580     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1581     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1582     /* cleaning up common data in pc_gamg - this should disapear someday */
1583     pc_gamg->data_cell_cols = 0;
1584     pc_gamg->data_cell_rows = 0;
1585     pc_gamg->orig_data_cell_cols = 0;
1586     pc_gamg->orig_data_cell_rows = 0;
1587     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1588     pc_gamg->data_sz = 0;
1589   }
1590   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1591   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1592   ierr = (*r)(pc);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 /* -------------------------------------------------------------------------- */
1597 /*
1598    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1599 
1600    Input Parameter:
1601 .  pc - the preconditioner context
1602 
1603    Output Parameter:
1604 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1605 
1606    Level: advanced
1607 */
1608 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1609 {
1610   PetscErrorCode ierr;
1611   PC_MG          *mg      = (PC_MG*)pc->data;
1612   PC_MG_Levels   **mglevels = mg->levels;
1613   PetscInt       lev;
1614   PetscLogDouble nnz0 = 0, sgc = 0;
1615   MatInfo        info;
1616 
1617   PetscFunctionBegin;
1618   if (!pc->setupcalled) {
1619     *gc = 0;
1620     PetscFunctionReturn(0);
1621   }
1622   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1623   for (lev=0; lev<mg->nlevels; lev++) {
1624     Mat dB;
1625     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1626     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1627     sgc += info.nz_used;
1628     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1629   }
1630   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1631   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1636 {
1637   PetscErrorCode ierr,i;
1638   PC_MG          *mg      = (PC_MG*)pc->data;
1639   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1640   PetscReal       gc=0;
1641   PetscFunctionBegin;
1642   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1643   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1644   for (i=0;i<pc_gamg->current_level;i++) {
1645     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1646   }
1647   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1648   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1649   if (pc_gamg->use_aggs_in_asm) {
1650     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1651   }
1652   if (pc_gamg->use_parallel_coarse_grid_solver) {
1653     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1654   }
1655 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1656   if (pc_gamg->cpu_pin_coarse_grids) {
1657     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1658   }
1659 #endif
1660   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1661   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1662   /* } else { */
1663   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1664   /* } */
1665   if (pc_gamg->ops->view) {
1666     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1667   }
1668   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1669   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1674 {
1675   PetscErrorCode ierr;
1676   PC_MG          *mg      = (PC_MG*)pc->data;
1677   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1678   PetscBool      flag,f2;
1679   MPI_Comm       comm;
1680   char           prefix[256],tname[32];
1681   PetscInt       i,n;
1682   const char     *pcpre;
1683   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1684   PetscFunctionBegin;
1685   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1686   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1687   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1688   if (flag) {
1689     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1690   }
1691   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1692   if (flag) {
1693     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1694   }
1695   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1696   f2 = PETSC_TRUE;
1697   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1698   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1699   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1700   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1701   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1702   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1703   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1704   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1705   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1706   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1707   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1708   n = PETSC_MG_MAXLEVELS;
1709   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1710   if (!flag || n < PETSC_MG_MAXLEVELS) {
1711     if (!flag) n = 1;
1712     i = n;
1713     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1714   }
1715   n = PETSC_MG_MAXLEVELS;
1716   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1717   if (!flag) i = 0;
1718   else i = n;
1719   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1720   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1721   {
1722     PetscReal eminmax[2] = {0., 0.};
1723     n = 2;
1724     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1725     if (flag) {
1726       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1727       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1728     }
1729   }
1730   /* set options for subtype */
1731   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1732 
1733   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1734   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1735   ierr = PetscOptionsTail();CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 /* -------------------------------------------------------------------------- */
1740 /*MC
1741      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1742 
1743    Options Database Keys:
1744 +   -pc_gamg_type <type> - one of agg, geo, or classical
1745 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1746 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1747 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1748 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1749                                         equations on each process that has degrees of freedom
1750 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1751 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1752 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1753 
1754    Options Database Keys for default Aggregation:
1755 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1756 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1757 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1758 
1759    Multigrid options:
1760 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1761 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1762 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1763 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1764 
1765 
1766   Notes:
1767     In order to obtain good performance for PCGAMG for vector valued problems you must
1768        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1769        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1770        See the Users Manual Chapter 4 for more details
1771 
1772   Level: intermediate
1773 
1774 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1775            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1776 M*/
1777 
1778 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1779 {
1780   PetscErrorCode ierr,i;
1781   PC_GAMG        *pc_gamg;
1782   PC_MG          *mg;
1783 
1784   PetscFunctionBegin;
1785    /* register AMG type */
1786   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1787 
1788   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1789   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1790   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1791 
1792   /* create a supporting struct and attach it to pc */
1793   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1794   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1795   mg           = (PC_MG*)pc->data;
1796   mg->innerctx = pc_gamg;
1797 
1798   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1799 
1800   pc_gamg->setup_count = 0;
1801   /* these should be in subctx but repartitioning needs simple arrays */
1802   pc_gamg->data_sz = 0;
1803   pc_gamg->data    = NULL;
1804 
1805   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1806   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1807   pc->ops->setup          = PCSetUp_GAMG;
1808   pc->ops->reset          = PCReset_GAMG;
1809   pc->ops->destroy        = PCDestroy_GAMG;
1810   mg->view                = PCView_GAMG;
1811 
1812   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1832   pc_gamg->repart           = PETSC_FALSE;
1833   pc_gamg->reuse_prol       = PETSC_FALSE;
1834   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1835   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1836   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1837   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1838   pc_gamg->min_eq_proc      = 50;
1839   pc_gamg->coarse_eq_limit  = 50;
1840   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1841   pc_gamg->threshold_scale = 1.;
1842   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1843   pc_gamg->current_level    = 0; /* don't need to init really */
1844   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1845   pc_gamg->esteig_max_it    = 10;
1846   pc_gamg->use_sa_esteig    = -1;
1847   pc_gamg->emin             = 0;
1848   pc_gamg->emax             = 0;
1849 
1850   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1851 
1852   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1853   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 /*@C
1858  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1859     from PCInitializePackage().
1860 
1861  Level: developer
1862 
1863  .seealso: PetscInitialize()
1864 @*/
1865 PetscErrorCode PCGAMGInitializePackage(void)
1866 {
1867   PetscErrorCode ierr;
1868 
1869   PetscFunctionBegin;
1870   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1871   PCGAMGPackageInitialized = PETSC_TRUE;
1872   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1873   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1874   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1875   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1876 
1877   /* general events */
1878   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1879   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1880   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1881   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1882   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1883   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1884   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1885 
1886 #if defined PETSC_GAMG_USE_LOG
1887   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1888   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1889   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1890   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1891   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1892   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1893   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1894   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1895   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1896   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1897   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1898   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1899   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1900   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1901   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1902   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1903   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1904 
1905   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1906   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1907   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1908   /* create timer stages */
1909 #if defined GAMG_STAGES
1910   {
1911     char     str[32];
1912     PetscInt lidx;
1913     sprintf(str,"MG Level %d (finest)",0);
1914     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1915     for (lidx=1; lidx<9; lidx++) {
1916       sprintf(str,"MG Level %d",(int)lidx);
1917       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1918     }
1919   }
1920 #endif
1921 #endif
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /*@C
1926  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1927     called from PetscFinalize() automatically.
1928 
1929  Level: developer
1930 
1931  .seealso: PetscFinalize()
1932 @*/
1933 PetscErrorCode PCGAMGFinalizePackage(void)
1934 {
1935   PetscErrorCode ierr;
1936 
1937   PetscFunctionBegin;
1938   PCGAMGPackageInitialized = PETSC_FALSE;
1939   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 /*@C
1944  PCGAMGRegister - Register a PCGAMG implementation.
1945 
1946  Input Parameters:
1947  + type - string that will be used as the name of the GAMG type.
1948  - create - function for creating the gamg context.
1949 
1950   Level: advanced
1951 
1952  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1953 @*/
1954 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1955 {
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1960   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964