xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 96b7d5c2ee3a706d3074402536cdfbeae64adfa4)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo3(pc,"Manually setting reduction to %d active processes (%d/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %d active processes\n",new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo2(pc,"Coarse grid reduction from %d to %d active processes\n",nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo4(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat mat;
403       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
404       *a_Amat_crs = mat;
405     }
406     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
407 
408     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
409     /* prolongator */
410     {
411       IS       findices;
412       PetscInt Istart,Iend;
413       Mat      Pnew;
414 
415       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
416       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
417       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
418       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
419       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
420       ierr = ISDestroy(&findices);CHKERRQ(ierr);
421       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
422 
423       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
424       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
425 
426       /* output - repartitioned */
427       *a_P_inout = Pnew;
428     }
429     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
430 
431     *a_nactive_proc = new_size; /* output */
432 
433     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
434     if (pc_gamg->cpu_pin_coarse_grids) {
435 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
436       static PetscInt llev = 2;
437       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
438 #endif
439       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
440       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
441       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
442         Mat         A = *a_Amat_crs, P = *a_P_inout;
443         PetscMPIInt size;
444         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
445         if (size > 1) {
446           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
447           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
448           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
449         }
450       }
451     }
452     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
458 {
459   PetscErrorCode ierr;
460   const char     *prefix;
461   char           addp[32];
462   PC_MG          *mg      = (PC_MG*)a_pc->data;
463   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
464 
465   PetscFunctionBegin;
466   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
467   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
468   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
469   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
470   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
471   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
472   if ((*Gmat2)->structurally_symmetric) {
473     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
474   } else {
475     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
476     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
477   }
478   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
479   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
480   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
481   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
482   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
483   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
484   (*Gmat2)->assembled = PETSC_TRUE;
485   PetscFunctionReturn(0);
486 }
487 
488 /* -------------------------------------------------------------------------- */
489 /*
490    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
491                     by setting data structures and options.
492 
493    Input Parameter:
494 .  pc - the preconditioner context
495 
496 */
497 PetscErrorCode PCSetUp_GAMG(PC pc)
498 {
499   PetscErrorCode ierr;
500   PC_MG          *mg      = (PC_MG*)pc->data;
501   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
502   Mat            Pmat     = pc->pmat;
503   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
504   MPI_Comm       comm;
505   PetscMPIInt    rank,size,nactivepe;
506   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
507   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
508   PetscLogDouble nnz0=0.,nnztot=0.;
509   MatInfo        info;
510   PetscBool      is_last = PETSC_FALSE;
511 
512   PetscFunctionBegin;
513   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
514   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
515   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
516 
517   if (pc->setupcalled) {
518     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
519       /* reset everything */
520       ierr = PCReset_MG(pc);CHKERRQ(ierr);
521       pc->setupcalled = 0;
522     } else {
523       PC_MG_Levels **mglevels = mg->levels;
524       /* just do Galerkin grids */
525       Mat          B,dA,dB;
526 
527       if (pc_gamg->Nlevels > 1) {
528         PetscInt gl;
529         /* currently only handle case where mat and pmat are the same on coarser levels */
530         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
531         /* (re)set to get dirty flag */
532         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
533 
534         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
535           MatReuse reuse = MAT_INITIAL_MATRIX ;
536 
537           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
538           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
539           if (B->product) {
540             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
541               reuse = MAT_REUSE_MATRIX;
542             }
543           }
544           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
545           if (reuse == MAT_REUSE_MATRIX) {
546             ierr = PetscInfo1(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
547           } else {
548             ierr = PetscInfo1(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
549           }
550           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
551           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
552           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
553           mglevels[level]->A = B;
554           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
555           dB   = B;
556         }
557       }
558 
559       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
560       PetscFunctionReturn(0);
561     }
562   }
563 
564   if (!pc_gamg->data) {
565     if (pc_gamg->orig_data) {
566       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
567       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
568 
569       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
570       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
571       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
572 
573       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
574       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
575     } else {
576       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
577       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
578     }
579   }
580 
581   /* cache original data for reuse */
582   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
583     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
584     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
585     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
586     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
587   }
588 
589   /* get basic dims */
590   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
591   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
592 
593   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
594   nnz0   = info.nz_used;
595   nnztot = info.nz_used;
596   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
597 
598   /* Get A_i and R_i */
599   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
600     pc_gamg->current_level = level;
601     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
602     level1 = level + 1;
603     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
604 #if defined(GAMG_STAGES)
605     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
606 #endif
607     { /* construct prolongator */
608       Mat              Gmat;
609       PetscCoarsenData *agg_lists;
610       Mat              Prol11;
611 
612       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
613       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
614       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
615 
616       /* could have failed to create new level */
617       if (Prol11) {
618         const char *prefix;
619         char       addp[32];
620 
621         /* get new block size of coarse matrices */
622         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
623 
624         if (pc_gamg->ops->optprolongator) {
625           /* smooth */
626           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
627         }
628 
629         if (pc_gamg->use_aggs_in_asm) {
630           PetscInt bs;
631           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
632           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
633         }
634 
635         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
636         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
637         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
638         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
639         /* Always generate the transpose with CUDA
640            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
641         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
642         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
643         Parr[level1] = Prol11;
644       } else Parr[level1] = NULL; /* failed to coarsen */
645 
646       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
647       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
648     } /* construct prolongator scope */
649     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
650     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
651     if (!Parr[level1]) { /* failed to coarsen */
652       ierr = PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
653 #if defined(GAMG_STAGES)
654       ierr = PetscLogStagePop();CHKERRQ(ierr);
655 #endif
656       break;
657     }
658     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
659     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
660     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
661     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
662     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
663     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
664 
665     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
666     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
667     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
668     nnztot += info.nz_used;
669     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
670 
671 #if defined(GAMG_STAGES)
672     ierr = PetscLogStagePop();CHKERRQ(ierr);
673 #endif
674     /* stop if one node or one proc -- could pull back for singular problems */
675     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
676       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
677       level++;
678       break;
679     }
680   } /* levels */
681   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
682 
683   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
684   pc_gamg->Nlevels = level + 1;
685   fine_level       = level;
686   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
687 
688   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
689     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
690 
691     /* set default smoothers & set operators */
692     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
693       KSP smoother;
694       PC  subpc;
695 
696       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
697       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
698 
699       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
700       /* set ops */
701       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
702       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
703 
704       /* set defaults */
705       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
706 
707       /* set blocks for ASM smoother that uses the 'aggregates' */
708       if (pc_gamg->use_aggs_in_asm) {
709         PetscInt sz;
710         IS       *iss;
711 
712         sz   = nASMBlocksArr[level];
713         iss  = ASMLocalIDsArr[level];
714         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
715         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
716         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
717         if (!sz) {
718           IS       is;
719           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
720           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
721           ierr = ISDestroy(&is);CHKERRQ(ierr);
722         } else {
723           PetscInt kk;
724           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
725           for (kk=0; kk<sz; kk++) {
726             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
727           }
728           ierr = PetscFree(iss);CHKERRQ(ierr);
729         }
730         ASMLocalIDsArr[level] = NULL;
731         nASMBlocksArr[level]  = 0;
732       } else {
733         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
734       }
735     }
736     {
737       /* coarse grid */
738       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
739       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
740 
741       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
742       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
743       if (!pc_gamg->use_parallel_coarse_grid_solver) {
744         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
745         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
746         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
747         ierr = PCSetUp(subpc);CHKERRQ(ierr);
748         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
749         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
750         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
751         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
752         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
753         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
754         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
755       }
756     }
757 
758     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
759     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
760     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
761     ierr = PetscOptionsEnd();CHKERRQ(ierr);
762     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
763 
764     /* setup cheby eigen estimates from SA */
765     if (pc_gamg->use_sa_esteig==1) {
766       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
767         KSP       smoother;
768         PetscBool ischeb;
769 
770         savesetfromoptions[level] = NULL;
771         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
772         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
773         if (ischeb) {
774           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
775 
776           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
777           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
778             PC        subpc;
779             PetscBool isjac;
780             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
781             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
782             if (isjac && pc_gamg->use_sa_esteig==1) {
783               PetscReal emax,emin;
784 
785               emin = mg->min_eigen_DinvA[level];
786               emax = mg->max_eigen_DinvA[level];
787               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
788               cheb->emin_computed = emin;
789               cheb->emax_computed = emax;
790               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
791 
792               /* We have set the eigenvalues and consumed the transformation values
793                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
794                  below when setfromoptions will be called again */
795               savesetfromoptions[level] = smoother->ops->setfromoptions;
796               smoother->ops->setfromoptions = NULL;
797             }
798           }
799         }
800       }
801     }
802 
803     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
804 
805     /* restore Chebyshev smoother for next calls */
806     if (pc_gamg->use_sa_esteig==1) {
807       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
808         if (savesetfromoptions[level]) {
809           KSP smoother;
810           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
811           smoother->ops->setfromoptions = savesetfromoptions[level];
812         }
813       }
814     }
815 
816     /* clean up */
817     for (level=1; level<pc_gamg->Nlevels; level++) {
818       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
819       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
820     }
821   } else {
822     KSP smoother;
823 
824     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
825     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
826     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
827     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
828     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 /* ------------------------------------------------------------------------- */
834 /*
835  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
836    that was created with PCCreate_GAMG().
837 
838    Input Parameter:
839 .  pc - the preconditioner context
840 
841    Application Interface Routine: PCDestroy()
842 */
843 PetscErrorCode PCDestroy_GAMG(PC pc)
844 {
845   PetscErrorCode ierr;
846   PC_MG          *mg     = (PC_MG*)pc->data;
847   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
848 
849   PetscFunctionBegin;
850   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
851   if (pc_gamg->ops->destroy) {
852     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
853   }
854   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
855   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
856   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
857   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 /*@
862    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
863 
864    Logically Collective on PC
865 
866    Input Parameters:
867 +  pc - the preconditioner context
868 -  n - the number of equations
869 
870 
871    Options Database Key:
872 .  -pc_gamg_process_eq_limit <limit>
873 
874    Notes:
875     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
876           that has degrees of freedom
877 
878    Level: intermediate
879 
880 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
881 @*/
882 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
883 {
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
888   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
893 {
894   PC_MG   *mg      = (PC_MG*)pc->data;
895   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
896 
897   PetscFunctionBegin;
898   if (n>0) pc_gamg->min_eq_proc = n;
899   PetscFunctionReturn(0);
900 }
901 
902 /*@
903    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
904 
905  Collective on PC
906 
907    Input Parameters:
908 +  pc - the preconditioner context
909 -  n - maximum number of equations to aim for
910 
911    Options Database Key:
912 .  -pc_gamg_coarse_eq_limit <limit>
913 
914    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
915      has less than 1000 unknowns.
916 
917    Level: intermediate
918 
919 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
920 @*/
921 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
922 {
923   PetscErrorCode ierr;
924 
925   PetscFunctionBegin;
926   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
927   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
932 {
933   PC_MG   *mg      = (PC_MG*)pc->data;
934   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
935 
936   PetscFunctionBegin;
937   if (n>0) pc_gamg->coarse_eq_limit = n;
938   PetscFunctionReturn(0);
939 }
940 
941 /*@
942    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
943 
944    Collective on PC
945 
946    Input Parameters:
947 +  pc - the preconditioner context
948 -  n - PETSC_TRUE or PETSC_FALSE
949 
950    Options Database Key:
951 .  -pc_gamg_repartition <true,false>
952 
953    Notes:
954     this will generally improve the loading balancing of the work on each level
955 
956    Level: intermediate
957 
958 .seealso: ()
959 @*/
960 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
966   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
971 {
972   PC_MG   *mg      = (PC_MG*)pc->data;
973   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
974 
975   PetscFunctionBegin;
976   pc_gamg->repart = n;
977   PetscFunctionReturn(0);
978 }
979 
980 /*@
981    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
982 
983    Collective on PC
984 
985    Input Parameters:
986 +  pc - the preconditioner context
987 -  n - number of its
988 
989    Options Database Key:
990 .  -pc_gamg_esteig_ksp_max_it <its>
991 
992    Notes:
993 
994    Level: intermediate
995 
996 .seealso: ()
997 @*/
998 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
999 {
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1004   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1009 {
1010   PC_MG   *mg      = (PC_MG*)pc->data;
1011   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1012 
1013   PetscFunctionBegin;
1014   pc_gamg->esteig_max_it = n;
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 /*@
1019    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1020 
1021    Collective on PC
1022 
1023    Input Parameters:
1024 +  pc - the preconditioner context
1025 -  n - number of its
1026 
1027    Options Database Key:
1028 .  -pc_gamg_use_sa_esteig <true,false>
1029 
1030    Notes:
1031 
1032    Level: intermediate
1033 
1034 .seealso: ()
1035 @*/
1036 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1037 {
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1042   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1047 {
1048   PC_MG   *mg      = (PC_MG*)pc->data;
1049   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1050 
1051   PetscFunctionBegin;
1052   pc_gamg->use_sa_esteig = n ? 1 : 0;
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 /*@C
1057    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1058 
1059    Collective on PC
1060 
1061    Input Parameters:
1062 +  pc - the preconditioner context
1063 -  t - ksp type
1064 
1065    Options Database Key:
1066 .  -pc_gamg_esteig_ksp_type <type>
1067 
1068    Notes:
1069 
1070    Level: intermediate
1071 
1072 .seealso: ()
1073 @*/
1074 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1075 {
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1080   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1085 {
1086   PetscErrorCode ierr;
1087   PC_MG   *mg      = (PC_MG*)pc->data;
1088   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1089 
1090   PetscFunctionBegin;
1091   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 /*@
1096    PCGAMGSetEigenvalues - Set eigenvalues
1097 
1098    Collective on PC
1099 
1100    Input Parameters:
1101 +  pc - the preconditioner context
1102 -  emax - max eigenvalue
1103 .  emin - min eigenvalue
1104 
1105    Options Database Key:
1106 .  -pc_gamg_eigenvalues
1107 
1108    Level: intermediate
1109 
1110 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1111 @*/
1112 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1113 {
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1118   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1123 {
1124   PC_MG          *mg      = (PC_MG*)pc->data;
1125   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1126 
1127   PetscFunctionBegin;
1128   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1129   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1130   pc_gamg->emax = emax;
1131   pc_gamg->emin = emin;
1132 
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 /*@
1137    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1138 
1139    Collective on PC
1140 
1141    Input Parameters:
1142 +  pc - the preconditioner context
1143 -  n - PETSC_TRUE or PETSC_FALSE
1144 
1145    Options Database Key:
1146 .  -pc_gamg_reuse_interpolation <true,false>
1147 
1148    Level: intermediate
1149 
1150    Notes:
1151     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1152           rebuilding the preconditioner quicker.
1153 
1154 .seealso: ()
1155 @*/
1156 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1157 {
1158   PetscErrorCode ierr;
1159 
1160   PetscFunctionBegin;
1161   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1162   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1167 {
1168   PC_MG   *mg      = (PC_MG*)pc->data;
1169   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1170 
1171   PetscFunctionBegin;
1172   pc_gamg->reuse_prol = n;
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 /*@
1177    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1178 
1179    Collective on PC
1180 
1181    Input Parameters:
1182 +  pc - the preconditioner context
1183 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1184 
1185    Options Database Key:
1186 .  -pc_gamg_asm_use_agg
1187 
1188    Level: intermediate
1189 
1190 .seealso: ()
1191 @*/
1192 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1193 {
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1198   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1203 {
1204   PC_MG   *mg      = (PC_MG*)pc->data;
1205   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1206 
1207   PetscFunctionBegin;
1208   pc_gamg->use_aggs_in_asm = flg;
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 /*@
1213    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1214 
1215    Collective on PC
1216 
1217    Input Parameters:
1218 +  pc - the preconditioner context
1219 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1220 
1221    Options Database Key:
1222 .  -pc_gamg_use_parallel_coarse_grid_solver
1223 
1224    Level: intermediate
1225 
1226 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1227 @*/
1228 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1229 {
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1234   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1239 {
1240   PC_MG   *mg      = (PC_MG*)pc->data;
1241   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1242 
1243   PetscFunctionBegin;
1244   pc_gamg->use_parallel_coarse_grid_solver = flg;
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 /*@
1249    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1250 
1251    Collective on PC
1252 
1253    Input Parameters:
1254 +  pc - the preconditioner context
1255 -  flg - PETSC_TRUE to pin coarse grids to CPU
1256 
1257    Options Database Key:
1258 .  -pc_gamg_cpu_pin_coarse_grids
1259 
1260    Level: intermediate
1261 
1262 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1263 @*/
1264 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1265 {
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1270   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1275 {
1276   PC_MG   *mg      = (PC_MG*)pc->data;
1277   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1278 
1279   PetscFunctionBegin;
1280   pc_gamg->cpu_pin_coarse_grids = flg;
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /*@
1285    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1286 
1287    Collective on PC
1288 
1289    Input Parameters:
1290 +  pc - the preconditioner context
1291 -  flg - Layout type
1292 
1293    Options Database Key:
1294 .  -pc_gamg_coarse_grid_layout_type
1295 
1296    Level: intermediate
1297 
1298 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1299 @*/
1300 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1301 {
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1306   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1311 {
1312   PC_MG   *mg      = (PC_MG*)pc->data;
1313   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1314 
1315   PetscFunctionBegin;
1316   pc_gamg->layout_type = flg;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 /*@
1321    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1322 
1323    Not collective on PC
1324 
1325    Input Parameters:
1326 +  pc - the preconditioner
1327 -  n - the maximum number of levels to use
1328 
1329    Options Database Key:
1330 .  -pc_mg_levels
1331 
1332    Level: intermediate
1333 
1334 .seealso: ()
1335 @*/
1336 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1337 {
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1342   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1347 {
1348   PC_MG   *mg      = (PC_MG*)pc->data;
1349   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1350 
1351   PetscFunctionBegin;
1352   pc_gamg->Nlevels = n;
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 /*@
1357    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1358 
1359    Not collective on PC
1360 
1361    Input Parameters:
1362 +  pc - the preconditioner context
1363 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1364 -  n - number of threshold values provided in array
1365 
1366    Options Database Key:
1367 .  -pc_gamg_threshold <threshold>
1368 
1369    Notes:
1370     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1371     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1372 
1373     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1374     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1375     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1376 
1377    Level: intermediate
1378 
1379 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1380 @*/
1381 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1382 {
1383   PetscErrorCode ierr;
1384 
1385   PetscFunctionBegin;
1386   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1387   if (n) PetscValidRealPointer(v,2);
1388   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1393 {
1394   PC_MG   *mg      = (PC_MG*)pc->data;
1395   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1396   PetscInt i;
1397   PetscFunctionBegin;
1398   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1399   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 /*@
1404    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1405 
1406    Collective on PC
1407 
1408    Input Parameters:
1409 +  pc - the preconditioner context
1410 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1411 -  n - number of values provided in array
1412 
1413    Options Database Key:
1414 .  -pc_gamg_rank_reduction_factors <factors>
1415 
1416    Level: intermediate
1417 
1418 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1419 @*/
1420 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1421 {
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1426   if (n) PetscValidIntPointer(v,2);
1427   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1432 {
1433   PC_MG   *mg      = (PC_MG*)pc->data;
1434   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1435   PetscInt i;
1436   PetscFunctionBegin;
1437   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1438   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 /*@
1443    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1444 
1445    Not collective on PC
1446 
1447    Input Parameters:
1448 +  pc - the preconditioner context
1449 -  scale - the threshold value reduction, ussually < 1.0
1450 
1451    Options Database Key:
1452 .  -pc_gamg_threshold_scale <v>
1453 
1454    Notes:
1455    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1456    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1457 
1458    Level: advanced
1459 
1460 .seealso: PCGAMGSetThreshold()
1461 @*/
1462 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1463 {
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1468   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1473 {
1474   PC_MG   *mg      = (PC_MG*)pc->data;
1475   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1476   PetscFunctionBegin;
1477   pc_gamg->threshold_scale = v;
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /*@C
1482    PCGAMGSetType - Set solution method
1483 
1484    Collective on PC
1485 
1486    Input Parameters:
1487 +  pc - the preconditioner context
1488 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1489 
1490    Options Database Key:
1491 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1492 
1493    Level: intermediate
1494 
1495 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1496 @*/
1497 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1498 {
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1503   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    PCGAMGGetType - Get solution method
1509 
1510    Collective on PC
1511 
1512    Input Parameter:
1513 .  pc - the preconditioner context
1514 
1515    Output Parameter:
1516 .  type - the type of algorithm used
1517 
1518    Level: intermediate
1519 
1520 .seealso: PCGAMGSetType(), PCGAMGType
1521 @*/
1522 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1523 {
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1528   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1533 {
1534   PC_MG          *mg      = (PC_MG*)pc->data;
1535   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1536 
1537   PetscFunctionBegin;
1538   *type = pc_gamg->type;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1543 {
1544   PetscErrorCode ierr,(*r)(PC);
1545   PC_MG          *mg      = (PC_MG*)pc->data;
1546   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1547 
1548   PetscFunctionBegin;
1549   pc_gamg->type = type;
1550   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1551   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1552   if (pc_gamg->ops->destroy) {
1553     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1554     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1555     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1556     /* cleaning up common data in pc_gamg - this should disapear someday */
1557     pc_gamg->data_cell_cols = 0;
1558     pc_gamg->data_cell_rows = 0;
1559     pc_gamg->orig_data_cell_cols = 0;
1560     pc_gamg->orig_data_cell_rows = 0;
1561     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1562     pc_gamg->data_sz = 0;
1563   }
1564   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1565   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1566   ierr = (*r)(pc);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /* -------------------------------------------------------------------------- */
1571 /*
1572    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1573 
1574    Input Parameter:
1575 .  pc - the preconditioner context
1576 
1577    Output Parameter:
1578 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1579 
1580    Level: advanced
1581 */
1582 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1583 {
1584   PetscErrorCode ierr;
1585   PC_MG          *mg      = (PC_MG*)pc->data;
1586   PC_MG_Levels   **mglevels = mg->levels;
1587   PetscInt       lev;
1588   PetscLogDouble nnz0 = 0, sgc = 0;
1589   MatInfo        info;
1590 
1591   PetscFunctionBegin;
1592   if (!pc->setupcalled) {
1593     *gc = 0;
1594     PetscFunctionReturn(0);
1595   }
1596   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1597   for (lev=0; lev<mg->nlevels; lev++) {
1598     Mat dB;
1599     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1600     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1601     sgc += info.nz_used;
1602     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1603   }
1604   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1605   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1610 {
1611   PetscErrorCode ierr,i;
1612   PC_MG          *mg      = (PC_MG*)pc->data;
1613   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1614   PetscReal       gc=0;
1615   PetscFunctionBegin;
1616   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1617   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1618   for (i=0;i<mg->nlevels; i++) {
1619     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1620   }
1621   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1622   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1623   if (pc_gamg->use_aggs_in_asm) {
1624     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1625   }
1626   if (pc_gamg->use_parallel_coarse_grid_solver) {
1627     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1628   }
1629 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1630   if (pc_gamg->cpu_pin_coarse_grids) {
1631     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1632   }
1633 #endif
1634   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1635   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1636   /* } else { */
1637   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1638   /* } */
1639   if (pc_gamg->ops->view) {
1640     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1641   }
1642   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1643   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1648 {
1649   PetscErrorCode ierr;
1650   PC_MG          *mg      = (PC_MG*)pc->data;
1651   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1652   PetscBool      flag,f2;
1653   MPI_Comm       comm;
1654   char           prefix[256],tname[32];
1655   PetscInt       i,n;
1656   const char     *pcpre;
1657   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1658   PetscFunctionBegin;
1659   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1660   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1661   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1662   if (flag) {
1663     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1664   }
1665   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1666   if (flag) {
1667     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1668   }
1669   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1670   f2 = PETSC_TRUE;
1671   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1672   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1673   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1674   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1675   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1676   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1677   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1678   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1679   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1680   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1681   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1682   n = PETSC_MG_MAXLEVELS;
1683   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1684   if (!flag || n < PETSC_MG_MAXLEVELS) {
1685     if (!flag) n = 1;
1686     i = n;
1687     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1688   }
1689   n = PETSC_MG_MAXLEVELS;
1690   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1691   if (!flag) i = 0;
1692   else i = n;
1693   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1694   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1695   {
1696     PetscReal eminmax[2] = {0., 0.};
1697     n = 2;
1698     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1699     if (flag) {
1700       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1701       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1702     }
1703   }
1704   /* set options for subtype */
1705   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1706 
1707   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1708   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1709   ierr = PetscOptionsTail();CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 /* -------------------------------------------------------------------------- */
1714 /*MC
1715      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1716 
1717    Options Database Keys:
1718 +   -pc_gamg_type <type> - one of agg, geo, or classical
1719 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1720 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1721 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1722 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1723                                         equations on each process that has degrees of freedom
1724 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1725 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1726 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1727 
1728    Options Database Keys for default Aggregation:
1729 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1730 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1731 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1732 
1733    Multigrid options:
1734 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1735 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1736 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1737 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1738 
1739 
1740   Notes:
1741     In order to obtain good performance for PCGAMG for vector valued problems you must
1742        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1743        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1744        See the Users Manual Chapter 4 for more details
1745 
1746   Level: intermediate
1747 
1748 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1749            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1750 M*/
1751 
1752 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1753 {
1754   PetscErrorCode ierr,i;
1755   PC_GAMG        *pc_gamg;
1756   PC_MG          *mg;
1757 
1758   PetscFunctionBegin;
1759    /* register AMG type */
1760   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1761 
1762   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1763   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1764   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1765 
1766   /* create a supporting struct and attach it to pc */
1767   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1768   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1769   mg           = (PC_MG*)pc->data;
1770   mg->innerctx = pc_gamg;
1771 
1772   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1773 
1774   /* these should be in subctx but repartitioning needs simple arrays */
1775   pc_gamg->data_sz = 0;
1776   pc_gamg->data    = NULL;
1777 
1778   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1779   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1780   pc->ops->setup          = PCSetUp_GAMG;
1781   pc->ops->reset          = PCReset_GAMG;
1782   pc->ops->destroy        = PCDestroy_GAMG;
1783   mg->view                = PCView_GAMG;
1784 
1785   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1798   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1805   pc_gamg->repart           = PETSC_FALSE;
1806   pc_gamg->reuse_prol       = PETSC_FALSE;
1807   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1808   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1809   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1810   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1811   pc_gamg->min_eq_proc      = 50;
1812   pc_gamg->coarse_eq_limit  = 50;
1813   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1814   pc_gamg->threshold_scale = 1.;
1815   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1816   pc_gamg->current_level    = 0; /* don't need to init really */
1817   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1818   pc_gamg->esteig_max_it    = 10;
1819   pc_gamg->use_sa_esteig    = -1;
1820   pc_gamg->emin             = 0;
1821   pc_gamg->emax             = 0;
1822 
1823   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1824 
1825   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1826   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 /*@C
1831  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1832     from PCInitializePackage().
1833 
1834  Level: developer
1835 
1836  .seealso: PetscInitialize()
1837 @*/
1838 PetscErrorCode PCGAMGInitializePackage(void)
1839 {
1840   PetscErrorCode ierr;
1841   PetscInt       l;
1842 
1843   PetscFunctionBegin;
1844   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1845   PCGAMGPackageInitialized = PETSC_TRUE;
1846   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1847   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1848   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1849   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1850 
1851   /* general events */
1852   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1853   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1854   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1855   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1856   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1857   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1858   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1859 
1860   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1861   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1862   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1863   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1864   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1865   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1866   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1867   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1868   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1869   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1870   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1871   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1872   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1873   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1874   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1875   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1876   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1877   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1878     char ename[32];
1879 
1880     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1881     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1882     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1883     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1884     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1885     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1886   }
1887   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1888   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1889   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1890   /* create timer stages */
1891 #if defined(GAMG_STAGES)
1892   {
1893     char     str[32];
1894     PetscInt lidx;
1895     sprintf(str,"MG Level %d (finest)",0);
1896     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1897     for (lidx=1; lidx<9; lidx++) {
1898       sprintf(str,"MG Level %d",(int)lidx);
1899       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1900     }
1901   }
1902 #endif
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 /*@C
1907  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1908     called from PetscFinalize() automatically.
1909 
1910  Level: developer
1911 
1912  .seealso: PetscFinalize()
1913 @*/
1914 PetscErrorCode PCGAMGFinalizePackage(void)
1915 {
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   PCGAMGPackageInitialized = PETSC_FALSE;
1920   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 /*@C
1925  PCGAMGRegister - Register a PCGAMG implementation.
1926 
1927  Input Parameters:
1928  + type - string that will be used as the name of the GAMG type.
1929  - create - function for creating the gamg context.
1930 
1931   Level: advanced
1932 
1933  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1934 @*/
1935 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1941   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945