xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 66c772fa27804d55f3e116d73aed8ddbbdf815d3)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo3(pc,"Manually setting reduction to %d active processes (%d/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %d active processes\n",new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo2(pc,"Coarse grid reduction from %d to %d active processes\n",nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo4(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat mat;
403       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
404       *a_Amat_crs = mat;
405     }
406     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
407 
408     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
409     /* prolongator */
410     {
411       IS       findices;
412       PetscInt Istart,Iend;
413       Mat      Pnew;
414 
415       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
416       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
417       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
418       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
419       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
420       ierr = ISDestroy(&findices);CHKERRQ(ierr);
421 #if defined(PETSC_HAVE_CUDA)
422       ierr = MatAIJCUSPARSESetGenerateTranspose(Pnew,PETSC_TRUE);CHKERRQ(ierr);
423 #endif
424 
425       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
426       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
427 
428       /* output - repartitioned */
429       *a_P_inout = Pnew;
430     }
431     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
432 
433     *a_nactive_proc = new_size; /* output */
434 
435     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
436     if (pc_gamg->cpu_pin_coarse_grids) {
437 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
438       static PetscInt llev = 2;
439       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
440 #endif
441       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
442       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
443       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
444         Mat         A = *a_Amat_crs, P = *a_P_inout;
445         PetscMPIInt size;
446         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
447         if (size > 1) {
448           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
449           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
450           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
451         }
452       }
453     }
454     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
460 {
461   PetscErrorCode ierr;
462   const char     *prefix;
463   char           addp[32];
464   PC_MG          *mg      = (PC_MG*)a_pc->data;
465   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
466 
467   PetscFunctionBegin;
468   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
469   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
470   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
471   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
472   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
473   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
474   if ((*Gmat2)->structurally_symmetric) {
475     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
476   } else {
477 #if defined(PETSC_HAVE_CUDA)
478     ierr = MatAIJCUSPARSESetGenerateTranspose(Gmat1,PETSC_TRUE);CHKERRQ(ierr);
479 #endif
480     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
481   }
482   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
483   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
484   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
485   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
486   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
487   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
488   (*Gmat2)->assembled = PETSC_TRUE;
489   PetscFunctionReturn(0);
490 }
491 
492 /* -------------------------------------------------------------------------- */
493 /*
494    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
495                     by setting data structures and options.
496 
497    Input Parameter:
498 .  pc - the preconditioner context
499 
500 */
501 PetscErrorCode PCSetUp_GAMG(PC pc)
502 {
503   PetscErrorCode ierr;
504   PC_MG          *mg      = (PC_MG*)pc->data;
505   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
506   Mat            Pmat     = pc->pmat;
507   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
508   MPI_Comm       comm;
509   PetscMPIInt    rank,size,nactivepe;
510   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
511   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
512   PetscLogDouble nnz0=0.,nnztot=0.;
513   MatInfo        info;
514   PetscBool      is_last = PETSC_FALSE;
515 
516   PetscFunctionBegin;
517   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
518   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
519   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
520 
521   if (pc->setupcalled) {
522     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
523       /* reset everything */
524       ierr = PCReset_MG(pc);CHKERRQ(ierr);
525       pc->setupcalled = 0;
526     } else {
527       PC_MG_Levels **mglevels = mg->levels;
528       /* just do Galerkin grids */
529       Mat          B,dA,dB;
530 
531       if (pc_gamg->Nlevels > 1) {
532         PetscInt gl;
533         /* currently only handle case where mat and pmat are the same on coarser levels */
534         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
535         /* (re)set to get dirty flag */
536         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
537 
538         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
539           MatReuse reuse = MAT_INITIAL_MATRIX ;
540 
541           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
542           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
543           if (B->product) {
544             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
545               reuse = MAT_REUSE_MATRIX;
546             }
547           }
548           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
549           if (reuse == MAT_REUSE_MATRIX) {
550             ierr = PetscInfo1(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
551           } else {
552             ierr = PetscInfo1(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
553           }
554           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
555           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
556           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
557           mglevels[level]->A = B;
558           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
559           dB   = B;
560         }
561       }
562 
563       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
564       PetscFunctionReturn(0);
565     }
566   }
567 
568   if (!pc_gamg->data) {
569     if (pc_gamg->orig_data) {
570       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
571       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
572 
573       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
574       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
575       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
576 
577       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
578       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
579     } else {
580       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
581       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
582     }
583   }
584 
585   /* cache original data for reuse */
586   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
587     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
588     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
589     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
590     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
591   }
592 
593   /* get basic dims */
594   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
595   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
596 
597   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
598   nnz0   = info.nz_used;
599   nnztot = info.nz_used;
600   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
601 
602   /* Get A_i and R_i */
603   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
604     pc_gamg->current_level = level;
605     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
606     level1 = level + 1;
607     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
608 #if defined(GAMG_STAGES)
609     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
610 #endif
611     { /* construct prolongator */
612       Mat              Gmat;
613       PetscCoarsenData *agg_lists;
614       Mat              Prol11;
615 
616       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
617       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
618       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
619 
620       /* could have failed to create new level */
621       if (Prol11) {
622         const char *prefix;
623         char       addp[32];
624 
625         /* get new block size of coarse matrices */
626         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
627 
628         if (pc_gamg->ops->optprolongator) {
629           /* smooth */
630           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
631         }
632 
633         if (pc_gamg->use_aggs_in_asm) {
634           PetscInt bs;
635           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
636           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
637         }
638 
639         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
640         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
641         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
642         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
643         /* Always generate the transpose with CUDA
644            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
645 #if defined(PETSC_HAVE_CUDA)
646         ierr = MatAIJCUSPARSESetGenerateTranspose(Prol11,PETSC_TRUE);CHKERRQ(ierr);
647 #endif
648         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
649         Parr[level1] = Prol11;
650       } else Parr[level1] = NULL; /* failed to coarsen */
651 
652       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
653       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
654     } /* construct prolongator scope */
655     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
656     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
657     if (!Parr[level1]) { /* failed to coarsen */
658       ierr = PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
659 #if defined(GAMG_STAGES)
660       ierr = PetscLogStagePop();CHKERRQ(ierr);
661 #endif
662       break;
663     }
664     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
665     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
666     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
667     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
668     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
669     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
670 
671     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
672     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
673     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
674     nnztot += info.nz_used;
675     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
676 
677 #if defined(GAMG_STAGES)
678     ierr = PetscLogStagePop();CHKERRQ(ierr);
679 #endif
680     /* stop if one node or one proc -- could pull back for singular problems */
681     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
682       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
683       level++;
684       break;
685     }
686   } /* levels */
687   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
688 
689   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
690   pc_gamg->Nlevels = level + 1;
691   fine_level       = level;
692   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
693 
694   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
695     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
696 
697     /* set default smoothers & set operators */
698     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
699       KSP smoother;
700       PC  subpc;
701 
702       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
703       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
704 
705       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
706       /* set ops */
707       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
708       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
709 
710       /* set defaults */
711       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
712 
713       /* set blocks for ASM smoother that uses the 'aggregates' */
714       if (pc_gamg->use_aggs_in_asm) {
715         PetscInt sz;
716         IS       *iss;
717 
718         sz   = nASMBlocksArr[level];
719         iss  = ASMLocalIDsArr[level];
720         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
721         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
722         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
723         if (!sz) {
724           IS       is;
725           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
726           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
727           ierr = ISDestroy(&is);CHKERRQ(ierr);
728         } else {
729           PetscInt kk;
730           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
731           for (kk=0; kk<sz; kk++) {
732             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
733           }
734           ierr = PetscFree(iss);CHKERRQ(ierr);
735         }
736         ASMLocalIDsArr[level] = NULL;
737         nASMBlocksArr[level]  = 0;
738       } else {
739         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
740       }
741     }
742     {
743       /* coarse grid */
744       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
745       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
746 
747       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
748       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
749       if (!pc_gamg->use_parallel_coarse_grid_solver) {
750         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
751         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
752         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
753         ierr = PCSetUp(subpc);CHKERRQ(ierr);
754         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
755         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
756         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
757         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
758         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
759         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
760         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
761       }
762     }
763 
764     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
765     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
766     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
767     ierr = PetscOptionsEnd();CHKERRQ(ierr);
768     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
769 
770     /* setup cheby eigen estimates from SA */
771     if (pc_gamg->use_sa_esteig==1) {
772       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
773         KSP       smoother;
774         PetscBool ischeb;
775 
776         savesetfromoptions[level] = NULL;
777         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
778         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
779         if (ischeb) {
780           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
781 
782           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
783           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
784             PC        subpc;
785             PetscBool isjac;
786             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
787             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
788             if (isjac && pc_gamg->use_sa_esteig==1) {
789               PetscReal emax,emin;
790 
791               emin = mg->min_eigen_DinvA[level];
792               emax = mg->max_eigen_DinvA[level];
793               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
794               cheb->emin_computed = emin;
795               cheb->emax_computed = emax;
796               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
797 
798               /* We have set the eigenvalues and consumed the transformation values
799                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
800                  below when setfromoptions will be called again */
801               savesetfromoptions[level] = smoother->ops->setfromoptions;
802               smoother->ops->setfromoptions = NULL;
803             }
804           }
805         }
806       }
807     }
808 
809     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
810 
811     /* restore Chebyshev smoother for next calls */
812     if (pc_gamg->use_sa_esteig==1) {
813       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
814         if (savesetfromoptions[level]) {
815           KSP smoother;
816           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
817           smoother->ops->setfromoptions = savesetfromoptions[level];
818         }
819       }
820     }
821 
822     /* clean up */
823     for (level=1; level<pc_gamg->Nlevels; level++) {
824       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
825       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
826     }
827   } else {
828     KSP smoother;
829 
830     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
831     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
832     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
833     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
834     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 /* ------------------------------------------------------------------------- */
840 /*
841  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
842    that was created with PCCreate_GAMG().
843 
844    Input Parameter:
845 .  pc - the preconditioner context
846 
847    Application Interface Routine: PCDestroy()
848 */
849 PetscErrorCode PCDestroy_GAMG(PC pc)
850 {
851   PetscErrorCode ierr;
852   PC_MG          *mg     = (PC_MG*)pc->data;
853   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
854 
855   PetscFunctionBegin;
856   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
857   if (pc_gamg->ops->destroy) {
858     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
861   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
862   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
863   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 /*@
868    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
869 
870    Logically Collective on PC
871 
872    Input Parameters:
873 +  pc - the preconditioner context
874 -  n - the number of equations
875 
876 
877    Options Database Key:
878 .  -pc_gamg_process_eq_limit <limit>
879 
880    Notes:
881     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
882           that has degrees of freedom
883 
884    Level: intermediate
885 
886 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
887 @*/
888 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
889 {
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
894   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
899 {
900   PC_MG   *mg      = (PC_MG*)pc->data;
901   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
902 
903   PetscFunctionBegin;
904   if (n>0) pc_gamg->min_eq_proc = n;
905   PetscFunctionReturn(0);
906 }
907 
908 /*@
909    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
910 
911  Collective on PC
912 
913    Input Parameters:
914 +  pc - the preconditioner context
915 -  n - maximum number of equations to aim for
916 
917    Options Database Key:
918 .  -pc_gamg_coarse_eq_limit <limit>
919 
920    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
921      has less than 1000 unknowns.
922 
923    Level: intermediate
924 
925 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
926 @*/
927 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
928 {
929   PetscErrorCode ierr;
930 
931   PetscFunctionBegin;
932   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
933   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
938 {
939   PC_MG   *mg      = (PC_MG*)pc->data;
940   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
941 
942   PetscFunctionBegin;
943   if (n>0) pc_gamg->coarse_eq_limit = n;
944   PetscFunctionReturn(0);
945 }
946 
947 /*@
948    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
949 
950    Collective on PC
951 
952    Input Parameters:
953 +  pc - the preconditioner context
954 -  n - PETSC_TRUE or PETSC_FALSE
955 
956    Options Database Key:
957 .  -pc_gamg_repartition <true,false>
958 
959    Notes:
960     this will generally improve the loading balancing of the work on each level
961 
962    Level: intermediate
963 
964 .seealso: ()
965 @*/
966 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
967 {
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
972   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
977 {
978   PC_MG   *mg      = (PC_MG*)pc->data;
979   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
980 
981   PetscFunctionBegin;
982   pc_gamg->repart = n;
983   PetscFunctionReturn(0);
984 }
985 
986 /*@
987    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
988 
989    Collective on PC
990 
991    Input Parameters:
992 +  pc - the preconditioner context
993 -  n - number of its
994 
995    Options Database Key:
996 .  -pc_gamg_esteig_ksp_max_it <its>
997 
998    Notes:
999 
1000    Level: intermediate
1001 
1002 .seealso: ()
1003 @*/
1004 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
1005 {
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1010   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1015 {
1016   PC_MG   *mg      = (PC_MG*)pc->data;
1017   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1018 
1019   PetscFunctionBegin;
1020   pc_gamg->esteig_max_it = n;
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 /*@
1025    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1026 
1027    Collective on PC
1028 
1029    Input Parameters:
1030 +  pc - the preconditioner context
1031 -  n - number of its
1032 
1033    Options Database Key:
1034 .  -pc_gamg_use_sa_esteig <true,false>
1035 
1036    Notes:
1037 
1038    Level: intermediate
1039 
1040 .seealso: ()
1041 @*/
1042 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1043 {
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1048   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1053 {
1054   PC_MG   *mg      = (PC_MG*)pc->data;
1055   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1056 
1057   PetscFunctionBegin;
1058   pc_gamg->use_sa_esteig = n ? 1 : 0;
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 /*@C
1063    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1064 
1065    Collective on PC
1066 
1067    Input Parameters:
1068 +  pc - the preconditioner context
1069 -  t - ksp type
1070 
1071    Options Database Key:
1072 .  -pc_gamg_esteig_ksp_type <type>
1073 
1074    Notes:
1075 
1076    Level: intermediate
1077 
1078 .seealso: ()
1079 @*/
1080 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1081 {
1082   PetscErrorCode ierr;
1083 
1084   PetscFunctionBegin;
1085   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1086   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1091 {
1092   PetscErrorCode ierr;
1093   PC_MG   *mg      = (PC_MG*)pc->data;
1094   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1095 
1096   PetscFunctionBegin;
1097   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 /*@
1102    PCGAMGSetEigenvalues - Set eigenvalues
1103 
1104    Collective on PC
1105 
1106    Input Parameters:
1107 +  pc - the preconditioner context
1108 -  emax - max eigenvalue
1109 .  emin - min eigenvalue
1110 
1111    Options Database Key:
1112 .  -pc_gamg_eigenvalues
1113 
1114    Level: intermediate
1115 
1116 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1117 @*/
1118 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1119 {
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1124   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1128 {
1129   PC_MG          *mg      = (PC_MG*)pc->data;
1130   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1131 
1132   PetscFunctionBegin;
1133   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1134   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1135   pc_gamg->emax = emax;
1136   pc_gamg->emin = emin;
1137 
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 /*@
1142    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1143 
1144    Collective on PC
1145 
1146    Input Parameters:
1147 +  pc - the preconditioner context
1148 -  n - PETSC_TRUE or PETSC_FALSE
1149 
1150    Options Database Key:
1151 .  -pc_gamg_reuse_interpolation <true,false>
1152 
1153    Level: intermediate
1154 
1155    Notes:
1156     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1157           rebuilding the preconditioner quicker.
1158 
1159 .seealso: ()
1160 @*/
1161 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1162 {
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1167   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1172 {
1173   PC_MG   *mg      = (PC_MG*)pc->data;
1174   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1175 
1176   PetscFunctionBegin;
1177   pc_gamg->reuse_prol = n;
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /*@
1182    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1183 
1184    Collective on PC
1185 
1186    Input Parameters:
1187 +  pc - the preconditioner context
1188 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1189 
1190    Options Database Key:
1191 .  -pc_gamg_asm_use_agg
1192 
1193    Level: intermediate
1194 
1195 .seealso: ()
1196 @*/
1197 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1203   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1208 {
1209   PC_MG   *mg      = (PC_MG*)pc->data;
1210   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1211 
1212   PetscFunctionBegin;
1213   pc_gamg->use_aggs_in_asm = flg;
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*@
1218    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1219 
1220    Collective on PC
1221 
1222    Input Parameters:
1223 +  pc - the preconditioner context
1224 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1225 
1226    Options Database Key:
1227 .  -pc_gamg_use_parallel_coarse_grid_solver
1228 
1229    Level: intermediate
1230 
1231 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1232 @*/
1233 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1234 {
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1239   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1244 {
1245   PC_MG   *mg      = (PC_MG*)pc->data;
1246   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1247 
1248   PetscFunctionBegin;
1249   pc_gamg->use_parallel_coarse_grid_solver = flg;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /*@
1254    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1255 
1256    Collective on PC
1257 
1258    Input Parameters:
1259 +  pc - the preconditioner context
1260 -  flg - PETSC_TRUE to pin coarse grids to CPU
1261 
1262    Options Database Key:
1263 .  -pc_gamg_cpu_pin_coarse_grids
1264 
1265    Level: intermediate
1266 
1267 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1268 @*/
1269 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1270 {
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1275   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1280 {
1281   PC_MG   *mg      = (PC_MG*)pc->data;
1282   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1283 
1284   PetscFunctionBegin;
1285   pc_gamg->cpu_pin_coarse_grids = flg;
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /*@
1290    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1291 
1292    Collective on PC
1293 
1294    Input Parameters:
1295 +  pc - the preconditioner context
1296 -  flg - Layout type
1297 
1298    Options Database Key:
1299 .  -pc_gamg_coarse_grid_layout_type
1300 
1301    Level: intermediate
1302 
1303 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1304 @*/
1305 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1306 {
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1311   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1316 {
1317   PC_MG   *mg      = (PC_MG*)pc->data;
1318   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1319 
1320   PetscFunctionBegin;
1321   pc_gamg->layout_type = flg;
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 /*@
1326    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1327 
1328    Not collective on PC
1329 
1330    Input Parameters:
1331 +  pc - the preconditioner
1332 -  n - the maximum number of levels to use
1333 
1334    Options Database Key:
1335 .  -pc_mg_levels
1336 
1337    Level: intermediate
1338 
1339 .seealso: ()
1340 @*/
1341 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1342 {
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1347   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1352 {
1353   PC_MG   *mg      = (PC_MG*)pc->data;
1354   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1355 
1356   PetscFunctionBegin;
1357   pc_gamg->Nlevels = n;
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /*@
1362    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1363 
1364    Not collective on PC
1365 
1366    Input Parameters:
1367 +  pc - the preconditioner context
1368 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1369 -  n - number of threshold values provided in array
1370 
1371    Options Database Key:
1372 .  -pc_gamg_threshold <threshold>
1373 
1374    Notes:
1375     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1376     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1377 
1378     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1379     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1380     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1381 
1382    Level: intermediate
1383 
1384 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1385 @*/
1386 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1387 {
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1392   if (n) PetscValidRealPointer(v,2);
1393   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1398 {
1399   PC_MG   *mg      = (PC_MG*)pc->data;
1400   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1401   PetscInt i;
1402   PetscFunctionBegin;
1403   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1404   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*@
1409    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1410 
1411    Collective on PC
1412 
1413    Input Parameters:
1414 +  pc - the preconditioner context
1415 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1416 -  n - number of values provided in array
1417 
1418    Options Database Key:
1419 .  -pc_gamg_rank_reduction_factors <factors>
1420 
1421    Level: intermediate
1422 
1423 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1424 @*/
1425 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1426 {
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1431   if (n) PetscValidIntPointer(v,2);
1432   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1437 {
1438   PC_MG   *mg      = (PC_MG*)pc->data;
1439   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1440   PetscInt i;
1441   PetscFunctionBegin;
1442   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1443   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@
1448    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1449 
1450    Not collective on PC
1451 
1452    Input Parameters:
1453 +  pc - the preconditioner context
1454 -  scale - the threshold value reduction, ussually < 1.0
1455 
1456    Options Database Key:
1457 .  -pc_gamg_threshold_scale <v>
1458 
1459    Notes:
1460    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1461    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1462 
1463    Level: advanced
1464 
1465 .seealso: PCGAMGSetThreshold()
1466 @*/
1467 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1468 {
1469   PetscErrorCode ierr;
1470 
1471   PetscFunctionBegin;
1472   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1473   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1478 {
1479   PC_MG   *mg      = (PC_MG*)pc->data;
1480   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1481   PetscFunctionBegin;
1482   pc_gamg->threshold_scale = v;
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 /*@C
1487    PCGAMGSetType - Set solution method
1488 
1489    Collective on PC
1490 
1491    Input Parameters:
1492 +  pc - the preconditioner context
1493 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1494 
1495    Options Database Key:
1496 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1497 
1498    Level: intermediate
1499 
1500 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1501 @*/
1502 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1503 {
1504   PetscErrorCode ierr;
1505 
1506   PetscFunctionBegin;
1507   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1508   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 /*@C
1513    PCGAMGGetType - Get solution method
1514 
1515    Collective on PC
1516 
1517    Input Parameter:
1518 .  pc - the preconditioner context
1519 
1520    Output Parameter:
1521 .  type - the type of algorithm used
1522 
1523    Level: intermediate
1524 
1525 .seealso: PCGAMGSetType(), PCGAMGType
1526 @*/
1527 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1528 {
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1533   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1538 {
1539   PC_MG          *mg      = (PC_MG*)pc->data;
1540   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1541 
1542   PetscFunctionBegin;
1543   *type = pc_gamg->type;
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1548 {
1549   PetscErrorCode ierr,(*r)(PC);
1550   PC_MG          *mg      = (PC_MG*)pc->data;
1551   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1552 
1553   PetscFunctionBegin;
1554   pc_gamg->type = type;
1555   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1556   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1557   if (pc_gamg->ops->destroy) {
1558     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1559     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1560     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1561     /* cleaning up common data in pc_gamg - this should disapear someday */
1562     pc_gamg->data_cell_cols = 0;
1563     pc_gamg->data_cell_rows = 0;
1564     pc_gamg->orig_data_cell_cols = 0;
1565     pc_gamg->orig_data_cell_rows = 0;
1566     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1567     pc_gamg->data_sz = 0;
1568   }
1569   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1570   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1571   ierr = (*r)(pc);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 /* -------------------------------------------------------------------------- */
1576 /*
1577    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1578 
1579    Input Parameter:
1580 .  pc - the preconditioner context
1581 
1582    Output Parameter:
1583 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1584 
1585    Level: advanced
1586 */
1587 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1588 {
1589   PetscErrorCode ierr;
1590   PC_MG          *mg      = (PC_MG*)pc->data;
1591   PC_MG_Levels   **mglevels = mg->levels;
1592   PetscInt       lev;
1593   PetscLogDouble nnz0 = 0, sgc = 0;
1594   MatInfo        info;
1595 
1596   PetscFunctionBegin;
1597   if (!pc->setupcalled) {
1598     *gc = 0;
1599     PetscFunctionReturn(0);
1600   }
1601   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1602   for (lev=0; lev<mg->nlevels; lev++) {
1603     Mat dB;
1604     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1605     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1606     sgc += info.nz_used;
1607     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1608   }
1609   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1610   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1615 {
1616   PetscErrorCode ierr,i;
1617   PC_MG          *mg      = (PC_MG*)pc->data;
1618   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1619   PetscReal       gc=0;
1620   PetscFunctionBegin;
1621   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1622   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1623   for (i=0;i<mg->nlevels; i++) {
1624     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1625   }
1626   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1627   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1628   if (pc_gamg->use_aggs_in_asm) {
1629     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1630   }
1631   if (pc_gamg->use_parallel_coarse_grid_solver) {
1632     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1633   }
1634 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1635   if (pc_gamg->cpu_pin_coarse_grids) {
1636     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1637   }
1638 #endif
1639   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1640   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1641   /* } else { */
1642   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1643   /* } */
1644   if (pc_gamg->ops->view) {
1645     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1646   }
1647   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1648   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1653 {
1654   PetscErrorCode ierr;
1655   PC_MG          *mg      = (PC_MG*)pc->data;
1656   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1657   PetscBool      flag,f2;
1658   MPI_Comm       comm;
1659   char           prefix[256],tname[32];
1660   PetscInt       i,n;
1661   const char     *pcpre;
1662   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1663   PetscFunctionBegin;
1664   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1665   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1666   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1667   if (flag) {
1668     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1669   }
1670   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1671   if (flag) {
1672     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1673   }
1674   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1675   f2 = PETSC_TRUE;
1676   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1677   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1678   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1679   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1680   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1681   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1682   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1683   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1684   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1685   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1686   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1687   n = PETSC_MG_MAXLEVELS;
1688   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1689   if (!flag || n < PETSC_MG_MAXLEVELS) {
1690     if (!flag) n = 1;
1691     i = n;
1692     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1693   }
1694   n = PETSC_MG_MAXLEVELS;
1695   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1696   if (!flag) i = 0;
1697   else i = n;
1698   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1699   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1700   {
1701     PetscReal eminmax[2] = {0., 0.};
1702     n = 2;
1703     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1704     if (flag) {
1705       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1706       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1707     }
1708   }
1709   /* set options for subtype */
1710   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1711 
1712   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1713   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1714   ierr = PetscOptionsTail();CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 /* -------------------------------------------------------------------------- */
1719 /*MC
1720      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1721 
1722    Options Database Keys:
1723 +   -pc_gamg_type <type> - one of agg, geo, or classical
1724 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1725 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1726 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1727 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1728                                         equations on each process that has degrees of freedom
1729 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1730 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1731 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1732 
1733    Options Database Keys for default Aggregation:
1734 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1735 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1736 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1737 
1738    Multigrid options:
1739 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1740 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1741 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1742 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1743 
1744 
1745   Notes:
1746     In order to obtain good performance for PCGAMG for vector valued problems you must
1747        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1748        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1749        See the Users Manual Chapter 4 for more details
1750 
1751   Level: intermediate
1752 
1753 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1754            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1755 M*/
1756 
1757 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1758 {
1759   PetscErrorCode ierr,i;
1760   PC_GAMG        *pc_gamg;
1761   PC_MG          *mg;
1762 
1763   PetscFunctionBegin;
1764    /* register AMG type */
1765   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1766 
1767   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1768   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1769   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1770 
1771   /* create a supporting struct and attach it to pc */
1772   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1773   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1774   mg           = (PC_MG*)pc->data;
1775   mg->innerctx = pc_gamg;
1776 
1777   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1778 
1779   /* these should be in subctx but repartitioning needs simple arrays */
1780   pc_gamg->data_sz = 0;
1781   pc_gamg->data    = NULL;
1782 
1783   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1784   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1785   pc->ops->setup          = PCSetUp_GAMG;
1786   pc->ops->reset          = PCReset_GAMG;
1787   pc->ops->destroy        = PCDestroy_GAMG;
1788   mg->view                = PCView_GAMG;
1789 
1790   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1798   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1810   pc_gamg->repart           = PETSC_FALSE;
1811   pc_gamg->reuse_prol       = PETSC_FALSE;
1812   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1813   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1814   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1815   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1816   pc_gamg->min_eq_proc      = 50;
1817   pc_gamg->coarse_eq_limit  = 50;
1818   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1819   pc_gamg->threshold_scale = 1.;
1820   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1821   pc_gamg->current_level    = 0; /* don't need to init really */
1822   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1823   pc_gamg->esteig_max_it    = 10;
1824   pc_gamg->use_sa_esteig    = -1;
1825   pc_gamg->emin             = 0;
1826   pc_gamg->emax             = 0;
1827 
1828   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1829 
1830   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1831   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 /*@C
1836  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1837     from PCInitializePackage().
1838 
1839  Level: developer
1840 
1841  .seealso: PetscInitialize()
1842 @*/
1843 PetscErrorCode PCGAMGInitializePackage(void)
1844 {
1845   PetscErrorCode ierr;
1846   PetscInt       l;
1847 
1848   PetscFunctionBegin;
1849   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1850   PCGAMGPackageInitialized = PETSC_TRUE;
1851   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1852   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1853   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1854   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1855 
1856   /* general events */
1857   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1858   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1859   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1860   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1861   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1862   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1863   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1864 
1865   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1866   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1867   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1868   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1869   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1870   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1871   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1872   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1873   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1874   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1875   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1876   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1877   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1878   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1879   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1880   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1881   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1882   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1883     char ename[32];
1884 
1885     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1886     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1887     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1888     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1889     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1890     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1891   }
1892   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1893   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1894   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1895   /* create timer stages */
1896 #if defined(GAMG_STAGES)
1897   {
1898     char     str[32];
1899     PetscInt lidx;
1900     sprintf(str,"MG Level %d (finest)",0);
1901     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1902     for (lidx=1; lidx<9; lidx++) {
1903       sprintf(str,"MG Level %d",(int)lidx);
1904       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1905     }
1906   }
1907 #endif
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 /*@C
1912  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1913     called from PetscFinalize() automatically.
1914 
1915  Level: developer
1916 
1917  .seealso: PetscFinalize()
1918 @*/
1919 PetscErrorCode PCGAMGFinalizePackage(void)
1920 {
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   PCGAMGPackageInitialized = PETSC_FALSE;
1925   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 /*@C
1930  PCGAMGRegister - Register a PCGAMG implementation.
1931 
1932  Input Parameters:
1933  + type - string that will be used as the name of the GAMG type.
1934  - create - function for creating the gamg context.
1935 
1936   Level: advanced
1937 
1938  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1939 @*/
1940 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1941 {
1942   PetscErrorCode ierr;
1943 
1944   PetscFunctionBegin;
1945   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1946   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950