xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision b4da3a1b2c3c2e292c3168fd4e6af360c056ffcc)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /* Hack to access same_local_solves */
7 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
8 
9 #if defined(PETSC_HAVE_CUDA)
10   #include <cuda_runtime.h>
11 #endif
12 
13 #if defined(PETSC_HAVE_HIP)
14   #include <hip/hip_runtime.h>
15 #endif
16 
17 #if defined PETSC_GAMG_USE_LOG
18 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
19 #endif
20 
21 #if defined PETSC_USE_LOG
22 PetscLogEvent PC_GAMGGraph_AGG;
23 PetscLogEvent PC_GAMGGraph_GEO;
24 PetscLogEvent PC_GAMGCoarsen_AGG;
25 PetscLogEvent PC_GAMGCoarsen_GEO;
26 PetscLogEvent PC_GAMGProlongator_AGG;
27 PetscLogEvent PC_GAMGProlongator_GEO;
28 PetscLogEvent PC_GAMGOptProlongator_AGG;
29 #endif
30 
31 /* #define GAMG_STAGES */
32 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
33 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
34 #endif
35 
36 static PetscFunctionList GAMGList = NULL;
37 static PetscBool PCGAMGPackageInitialized;
38 
39 /* ----------------------------------------------------------------------------- */
40 PetscErrorCode PCReset_GAMG(PC pc)
41 {
42   PetscErrorCode ierr, level;
43   PC_MG          *mg      = (PC_MG*)pc->data;
44   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
45 
46   PetscFunctionBegin;
47   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
48   pc_gamg->data_sz = 0;
49   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
50   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
51     mg->min_eigen_DinvA[level] = 0;
52     mg->max_eigen_DinvA[level] = 0;
53   }
54   pc_gamg->emin = 0;
55   pc_gamg->emax = 0;
56   PetscFunctionReturn(0);
57 }
58 
59 /* -------------------------------------------------------------------------- */
60 /*
61    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
62      of active processors.
63 
64    Input Parameter:
65    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
66           'pc_gamg->data_sz' are changed via repartitioning/reduction.
67    . Amat_fine - matrix on this fine (k) level
68    . cr_bs - coarse block size
69    In/Output Parameter:
70    . a_P_inout - prolongation operator to the next level (k-->k-1)
71    . a_nactive_proc - number of active procs
72    Output Parameter:
73    . a_Amat_crs - coarse matrix that is created (k-1)
74 */
75 
76 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
77 {
78   PetscErrorCode  ierr;
79   PC_MG           *mg         = (PC_MG*)pc->data;
80   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
81   Mat             Cmat,Pold=*a_P_inout;
82   MPI_Comm        comm;
83   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
84   PetscInt        ncrs_eq,ncrs,f_bs;
85 
86   PetscFunctionBegin;
87   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
88   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
89   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
90   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
91   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
92 
93   if (Pcolumnperm) *Pcolumnperm = NULL;
94 
95   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
96   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
97   if (pc_gamg->data_cell_rows>0) {
98     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
99   } else {
100     PetscInt  bs;
101     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
102     ncrs = ncrs_eq/bs;
103   }
104   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
105   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
106 #if defined(PETSC_HAVE_CUDA)
107     PetscShmComm pshmcomm;
108     PetscMPIInt  locrank;
109     MPI_Comm     loccomm;
110     PetscInt     s_nnodes,r_nnodes, new_new_size;
111     cudaError_t  cerr;
112     int          devCount;
113     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
114     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
115     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRQ(ierr);
116     s_nnodes = !locrank;
117     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
118     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
119     devCount = 0;
120     cerr = cudaGetDeviceCount(&devCount);
121     cudaGetLastError(); /* Reset the last error */
122     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
123       new_new_size = r_nnodes * devCount;
124       new_size = new_new_size;
125       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %D --> %D (devCount=%D #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
126     } else {
127       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
128       goto HEURISTIC;
129     }
130 #else
131     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
132 #endif
133   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
134     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
135     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
136     ierr = PetscInfo3(pc,"Manually setting reduction to %D active processes (%D/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
137   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
138     new_size = 1;
139     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %D active processes\n",new_size);CHKERRQ(ierr);
140   } else {
141     PetscInt ncrs_eq_glob;
142 #if defined(PETSC_HAVE_CUDA)
143     HEURISTIC:
144 #endif
145     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
146     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
147     if (!new_size) new_size = 1; /* not likely, posible? */
148     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
149     ierr = PetscInfo2(pc,"Coarse grid reduction from %D to %D active processes\n",nactive,new_size);CHKERRQ(ierr);
150   }
151   if (new_size==nactive) {
152     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
153     if (new_size < size) {
154       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
155       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%D) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
156       if (pc_gamg->cpu_pin_coarse_grids) {
157         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
158         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
159       }
160     }
161     /* we know that the grid structure can be reused in MatPtAP */
162   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
163     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
164     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
165     nloc_old = ncrs_eq/cr_bs;
166     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
167     /* get new_size and rfactor */
168     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
169       /* find factor */
170       if (new_size == 1) rfactor = size; /* don't modify */
171       else {
172         PetscReal best_fact = 0.;
173         jj = -1;
174         for (kk = 1 ; kk <= size ; kk++) {
175           if (!(size%kk)) { /* a candidate */
176             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
177             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
178             if (fact > best_fact) {
179               best_fact = fact; jj = kk;
180             }
181           }
182         }
183         if (jj != -1) rfactor = jj;
184         else rfactor = 1; /* a prime */
185         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
186         else expand_factor = rfactor;
187       }
188       new_size = size/rfactor; /* make new size one that is factor */
189       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
190         *a_Amat_crs = Cmat;
191         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%D, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
192         PetscFunctionReturn(0);
193       }
194     }
195 #if defined PETSC_GAMG_USE_LOG
196     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
197 #endif
198     /* make 'is_eq_newproc' */
199     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
200     if (pc_gamg->repart) {
201       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
202       Mat      adj;
203       ierr = PetscInfo4(pc,"Repartition: size (active): %D --> %D, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
204       /* get 'adj' */
205       if (cr_bs == 1) {
206         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
207       } else {
208         /* make a scalar matrix to partition (no Stokes here) */
209         Mat               tMat;
210         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
211         const PetscScalar *vals;
212         const PetscInt    *idx;
213         PetscInt          *d_nnz, *o_nnz, M, N;
214         static PetscInt   llev = 0; /* ugly but just used for debugging */
215         MatType           mtype;
216 
217         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
218         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
219         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
220         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
221           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
222           d_nnz[jj] = ncols/cr_bs;
223           o_nnz[jj] = ncols/cr_bs;
224           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
225           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
226           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
227         }
228 
229         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
230         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
231         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
232         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
233         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
234         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
235         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
236 
237         for (ii = Istart_crs; ii < Iend_crs; ii++) {
238           PetscInt dest_row = ii/cr_bs;
239           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
240           for (jj = 0; jj < ncols; jj++) {
241             PetscInt    dest_col = idx[jj]/cr_bs;
242             PetscScalar v        = 1.0;
243             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
244           }
245           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
246         }
247         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
249 
250         if (llev++ == -1) {
251           PetscViewer viewer; char fname[32];
252           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
253           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
254           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
255           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
256         }
257         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
258         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
259       } /* create 'adj' */
260 
261       { /* partition: get newproc_idx */
262         char            prefix[256];
263         const char      *pcpre;
264         const PetscInt  *is_idx;
265         MatPartitioning mpart;
266         IS              proc_is;
267 
268         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
270         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
271         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
272         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
273         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
274         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
275         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
276         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
277 
278         /* collect IS info */
279         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
280         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
281         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
282           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
283             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
284           }
285         }
286         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
287         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
288       }
289       ierr = MatDestroy(&adj);CHKERRQ(ierr);
290 
291       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
292       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
293     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
294       PetscInt targetPE;
295       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
296       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
297       targetPE = (rank/rfactor)*expand_factor;
298       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
299     } /* end simple 'is_eq_newproc' */
300 
301     /*
302       Create an index set from the is_eq_newproc index set to indicate the mapping TO
303     */
304     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
305     is_eq_num_prim = is_eq_num;
306     /*
307       Determine how many equations/vertices are assigned to each processor
308     */
309     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
310     ncrs_eq_new = counts[rank];
311     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
312     ncrs_new = ncrs_eq_new/cr_bs;
313 
314     ierr = PetscFree(counts);CHKERRQ(ierr);
315     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
316     {
317       Vec            src_crd, dest_crd;
318       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
319       VecScatter     vecscat;
320       PetscScalar    *array;
321       IS isscat;
322       /* move data (for primal equations only) */
323       /* Create a vector to contain the newly ordered element information */
324       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
325       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
326       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
327       /*
328         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
329         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
330       */
331       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
332       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       for (ii=0,jj=0; ii<ncrs; ii++) {
334         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
335         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
336       }
337       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
338       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
339       ierr = PetscFree(tidx);CHKERRQ(ierr);
340       /*
341         Create a vector to contain the original vertex information for each element
342       */
343       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
344       for (jj=0; jj<ndata_cols; jj++) {
345         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
346         for (ii=0; ii<ncrs; ii++) {
347           for (kk=0; kk<ndata_rows; kk++) {
348             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
349             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
350             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
351           }
352         }
353       }
354       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
355       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
356       /*
357         Scatter the element vertex information (still in the original vertex ordering)
358         to the correct processor
359       */
360       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
361       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
362       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
363       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
364       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
365       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
366       /*
367         Put the element vertex data into a new allocation of the gdata->ele
368       */
369       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
370       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
371 
372       pc_gamg->data_sz = node_data_sz*ncrs_new;
373       strideNew        = ncrs_new*ndata_rows;
374 
375       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
376       for (jj=0; jj<ndata_cols; jj++) {
377         for (ii=0; ii<ncrs_new; ii++) {
378           for (kk=0; kk<ndata_rows; kk++) {
379             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
380             pc_gamg->data[ix] = PetscRealPart(array[jx]);
381           }
382         }
383       }
384       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
385       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
386     }
387     /* move A and P (columns) with new layout */
388 #if defined PETSC_GAMG_USE_LOG
389     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
390 #endif
391     /*
392       Invert for MatCreateSubMatrix
393     */
394     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
395     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
396     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
397     if (is_eq_num != is_eq_num_prim) {
398       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
399     }
400     if (Pcolumnperm) {
401       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
402       *Pcolumnperm = new_eq_indices;
403     }
404     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
405 #if defined PETSC_GAMG_USE_LOG
406     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
407     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
408 #endif
409     /* 'a_Amat_crs' output */
410     {
411       Mat mat;
412       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
413       *a_Amat_crs = mat;
414     }
415     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
416 
417 #if defined PETSC_GAMG_USE_LOG
418     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
419 #endif
420     /* prolongator */
421     {
422       IS       findices;
423       PetscInt Istart,Iend;
424       Mat      Pnew;
425 
426       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
427 #if defined PETSC_GAMG_USE_LOG
428       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
429 #endif
430       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
431       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
432       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
433       ierr = ISDestroy(&findices);CHKERRQ(ierr);
434 #if defined(PETSC_HAVE_CUDA)
435       ierr = MatAIJCUSPARSESetGenerateTranspose(Pnew,PETSC_TRUE);CHKERRQ(ierr);
436 #endif
437 
438 #if defined PETSC_GAMG_USE_LOG
439       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
440 #endif
441       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
442 
443       /* output - repartitioned */
444       *a_P_inout = Pnew;
445     }
446     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
447 
448     *a_nactive_proc = new_size; /* output */
449 
450     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
451     if (pc_gamg->cpu_pin_coarse_grids) {
452 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
453       static PetscInt llev = 2;
454       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
455 #endif
456       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
457       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
458       if (1) { /* lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
459         Mat         A = *a_Amat_crs, P = *a_P_inout;
460         PetscMPIInt size;
461         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
462         if (size > 1) {
463           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
464           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
465           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
466         }
467       }
468     }
469 #if defined PETSC_GAMG_USE_LOG
470     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
471 #endif
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
477 {
478   PetscErrorCode ierr;
479   const char     *prefix;
480   char           addp[32];
481   PC_MG          *mg      = (PC_MG*)a_pc->data;
482   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
483 
484   PetscFunctionBegin;
485   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
486   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
487   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
488   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
489   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
490   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
491   if ((*Gmat2)->structurally_symmetric) {
492     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
493   } else {
494 #if defined(PETSC_HAVE_CUDA)
495     ierr = MatAIJCUSPARSESetGenerateTranspose(Gmat1,PETSC_TRUE);CHKERRQ(ierr);
496 #endif
497     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
498   }
499   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
500   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
501   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
502   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
503   (*Gmat2)->assembled = PETSC_TRUE;
504   PetscFunctionReturn(0);
505 }
506 
507 /* -------------------------------------------------------------------------- */
508 /*
509    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
510                     by setting data structures and options.
511 
512    Input Parameter:
513 .  pc - the preconditioner context
514 
515 */
516 PetscErrorCode PCSetUp_GAMG(PC pc)
517 {
518   PetscErrorCode ierr;
519   PC_MG          *mg      = (PC_MG*)pc->data;
520   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
521   Mat            Pmat     = pc->pmat;
522   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
523   MPI_Comm       comm;
524   PetscMPIInt    rank,size,nactivepe;
525   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
526   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
527   PetscLogDouble nnz0=0.,nnztot=0.;
528   MatInfo        info;
529   PetscBool      is_last = PETSC_FALSE;
530 
531   PetscFunctionBegin;
532   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
533   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
534   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
535 
536   if (pc->setupcalled) {
537     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
538       /* reset everything */
539       ierr = PCReset_MG(pc);CHKERRQ(ierr);
540       pc->setupcalled = 0;
541     } else {
542       PC_MG_Levels **mglevels = mg->levels;
543       /* just do Galerkin grids */
544       Mat          B,dA,dB;
545 
546       if (pc_gamg->Nlevels > 1) {
547         /* currently only handle case where mat and pmat are the same on coarser levels */
548         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
549         /* (re)set to get dirty flag */
550         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
551 
552         for (level=pc_gamg->Nlevels-2; level>=0; level--) {
553           MatReuse reuse = MAT_INITIAL_MATRIX ;
554 
555           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
556           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
557           if (B->product) {
558             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
559               reuse = MAT_REUSE_MATRIX;
560             }
561           }
562           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
563           if (reuse == MAT_REUSE_MATRIX) {
564             ierr = PetscInfo1(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
565           } else {
566             ierr = PetscInfo1(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
567           }
568           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
569           mglevels[level]->A = B;
570           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
571           dB   = B;
572         }
573       }
574 
575       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
576       PetscFunctionReturn(0);
577     }
578   }
579 
580   if (!pc_gamg->data) {
581     if (pc_gamg->orig_data) {
582       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
583       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
584 
585       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
586       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
587       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
588 
589       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
590       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
591     } else {
592       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
593       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
594     }
595   }
596 
597   /* cache original data for reuse */
598   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
599     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
600     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
601     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
602     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
603   }
604 
605   /* get basic dims */
606   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
607   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
608 
609   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
610   nnz0   = info.nz_used;
611   nnztot = info.nz_used;
612   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
613 
614   /* Get A_i and R_i */
615   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
616     pc_gamg->current_level = level;
617     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
618     level1 = level + 1;
619 #if defined PETSC_GAMG_USE_LOG
620     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
621 #if (defined GAMG_STAGES)
622     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
623 #endif
624 #endif
625     { /* construct prolongator */
626       Mat              Gmat;
627       PetscCoarsenData *agg_lists;
628       Mat              Prol11;
629 
630       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
631       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
632       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
633 
634       /* could have failed to create new level */
635       if (Prol11) {
636         const char *prefix;
637         char       addp[32];
638 
639         /* get new block size of coarse matrices */
640         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
641 
642         if (pc_gamg->ops->optprolongator) {
643           /* smooth */
644           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
645         }
646 
647         if (pc_gamg->use_aggs_in_asm) {
648           PetscInt bs;
649           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
650           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
651         }
652 
653         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
654         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
655         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
656         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
657         /* Always generate the transpose with CUDA
658            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
659 #if defined(PETSC_HAVE_CUDA)
660         ierr = MatAIJCUSPARSESetGenerateTranspose(Prol11,PETSC_TRUE);CHKERRQ(ierr);
661 #endif
662         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
663         Parr[level1] = Prol11;
664       } else Parr[level1] = NULL; /* failed to coarsen */
665 
666       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
667       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
668     } /* construct prolongator scope */
669 #if defined PETSC_GAMG_USE_LOG
670     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
671 #endif
672     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
673     if (!Parr[level1]) { /* failed to coarsen */
674       ierr =  PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
675 #if defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES
676       ierr = PetscLogStagePop();CHKERRQ(ierr);
677 #endif
678       break;
679     }
680 #if defined PETSC_GAMG_USE_LOG
681     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
682 #endif
683     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
684     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
685     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
686     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
687     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
688 
689 #if defined PETSC_GAMG_USE_LOG
690     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
691 #endif
692     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
693     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
694     nnztot += info.nz_used;
695     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
696 
697 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
698     ierr = PetscLogStagePop();CHKERRQ(ierr);
699 #endif
700     /* stop if one node or one proc -- could pull back for singular problems */
701     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
702       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
703       level++;
704       break;
705     }
706   } /* levels */
707   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
708 
709   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
710   pc_gamg->Nlevels = level + 1;
711   fine_level       = level;
712   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
713 
714   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
715     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
716 
717     /* set default smoothers & set operators */
718     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
719       KSP smoother;
720       PC  subpc;
721 
722       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
723       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
724 
725       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
726       /* set ops */
727       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
728       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
729 
730       /* set defaults */
731       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
732 
733       /* set blocks for ASM smoother that uses the 'aggregates' */
734       if (pc_gamg->use_aggs_in_asm) {
735         PetscInt sz;
736         IS       *iss;
737 
738         sz   = nASMBlocksArr[level];
739         iss  = ASMLocalIDsArr[level];
740         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
741         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
742         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
743         if (!sz) {
744           IS       is;
745           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
746           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
747           ierr = ISDestroy(&is);CHKERRQ(ierr);
748         } else {
749           PetscInt kk;
750           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
751           for (kk=0; kk<sz; kk++) {
752             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
753           }
754           ierr = PetscFree(iss);CHKERRQ(ierr);
755         }
756         ASMLocalIDsArr[level] = NULL;
757         nASMBlocksArr[level]  = 0;
758       } else {
759         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
760       }
761     }
762     {
763       /* coarse grid */
764       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
765       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
766 
767       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
768       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
769       if (!pc_gamg->use_parallel_coarse_grid_solver) {
770         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
771         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
772         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
773         ierr = PCSetUp(subpc);CHKERRQ(ierr);
774         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
775         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
776         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
777         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
778         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
779         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
780         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
781         /* This flag gets reset by PCBJacobiGetSubKSP(), but our BJacobi really does the same algorithm everywhere (and in
782          * fact, all but one process will have zero dofs), so we reset the flag to avoid having PCView_BJacobi attempt to
783          * view every subdomain as though they were different. */
784         ((PC_BJacobi*)subpc->data)->same_local_solves = PETSC_TRUE;
785       }
786     }
787 
788     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
789     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
790     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
791     ierr = PetscOptionsEnd();CHKERRQ(ierr);
792     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
793 
794     /* setup cheby eigen estimates from SA */
795     if (pc_gamg->use_sa_esteig==1) {
796       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
797         KSP       smoother;
798         PetscBool ischeb;
799 
800         savesetfromoptions[level] = NULL;
801         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
802         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
803         if (ischeb) {
804           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
805 
806           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
807           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
808             PC        subpc;
809             PetscBool isjac;
810             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
811             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
812             if (isjac && pc_gamg->use_sa_esteig==1) {
813               PetscReal emax,emin;
814 
815               emin = mg->min_eigen_DinvA[level];
816               emax = mg->max_eigen_DinvA[level];
817               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
818               cheb->emin_computed = emin;
819               cheb->emax_computed = emax;
820               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
821 
822               /* We have set the eigenvalues and consumed the transformation values
823                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
824                  below when setfromoptions will be called again */
825               savesetfromoptions[level] = smoother->ops->setfromoptions;
826               smoother->ops->setfromoptions = NULL;
827             }
828           }
829         }
830       }
831     }
832 
833     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
834 
835     /* restore Chebyshev smoother for next calls */
836     if (pc_gamg->use_sa_esteig==1) {
837       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
838         if (savesetfromoptions[level]) {
839           KSP smoother;
840           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
841           smoother->ops->setfromoptions = savesetfromoptions[level];
842         }
843       }
844     }
845 
846     /* clean up */
847     for (level=1; level<pc_gamg->Nlevels; level++) {
848       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
849       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
850     }
851   } else {
852     KSP smoother;
853 
854     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
855     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
856     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
857     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
858     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 /* ------------------------------------------------------------------------- */
864 /*
865  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
866    that was created with PCCreate_GAMG().
867 
868    Input Parameter:
869 .  pc - the preconditioner context
870 
871    Application Interface Routine: PCDestroy()
872 */
873 PetscErrorCode PCDestroy_GAMG(PC pc)
874 {
875   PetscErrorCode ierr;
876   PC_MG          *mg     = (PC_MG*)pc->data;
877   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
878 
879   PetscFunctionBegin;
880   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
881   if (pc_gamg->ops->destroy) {
882     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
883   }
884   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
885   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
886   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
887   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 /*@
892    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
893 
894    Logically Collective on PC
895 
896    Input Parameters:
897 +  pc - the preconditioner context
898 -  n - the number of equations
899 
900 
901    Options Database Key:
902 .  -pc_gamg_process_eq_limit <limit>
903 
904    Notes:
905     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
906           that has degrees of freedom
907 
908    Level: intermediate
909 
910 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
911 @*/
912 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
913 {
914   PetscErrorCode ierr;
915 
916   PetscFunctionBegin;
917   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
918   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
923 {
924   PC_MG   *mg      = (PC_MG*)pc->data;
925   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
926 
927   PetscFunctionBegin;
928   if (n>0) pc_gamg->min_eq_proc = n;
929   PetscFunctionReturn(0);
930 }
931 
932 /*@
933    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
934 
935  Collective on PC
936 
937    Input Parameters:
938 +  pc - the preconditioner context
939 -  n - maximum number of equations to aim for
940 
941    Options Database Key:
942 .  -pc_gamg_coarse_eq_limit <limit>
943 
944    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
945      has less than 1000 unknowns.
946 
947    Level: intermediate
948 
949 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
950 @*/
951 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
952 {
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
957   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
962 {
963   PC_MG   *mg      = (PC_MG*)pc->data;
964   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
965 
966   PetscFunctionBegin;
967   if (n>0) pc_gamg->coarse_eq_limit = n;
968   PetscFunctionReturn(0);
969 }
970 
971 /*@
972    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
973 
974    Collective on PC
975 
976    Input Parameters:
977 +  pc - the preconditioner context
978 -  n - PETSC_TRUE or PETSC_FALSE
979 
980    Options Database Key:
981 .  -pc_gamg_repartition <true,false>
982 
983    Notes:
984     this will generally improve the loading balancing of the work on each level
985 
986    Level: intermediate
987 
988 .seealso: ()
989 @*/
990 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
991 {
992   PetscErrorCode ierr;
993 
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
996   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
1001 {
1002   PC_MG   *mg      = (PC_MG*)pc->data;
1003   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1004 
1005   PetscFunctionBegin;
1006   pc_gamg->repart = n;
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 /*@
1011    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
1012 
1013    Collective on PC
1014 
1015    Input Parameters:
1016 +  pc - the preconditioner context
1017 -  n - number of its
1018 
1019    Options Database Key:
1020 .  -pc_gamg_esteig_ksp_max_it <its>
1021 
1022    Notes:
1023 
1024    Level: intermediate
1025 
1026 .seealso: ()
1027 @*/
1028 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
1029 {
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1034   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1039 {
1040   PC_MG   *mg      = (PC_MG*)pc->data;
1041   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1042 
1043   PetscFunctionBegin;
1044   pc_gamg->esteig_max_it = n;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 /*@
1049    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1050 
1051    Collective on PC
1052 
1053    Input Parameters:
1054 +  pc - the preconditioner context
1055 -  n - number of its
1056 
1057    Options Database Key:
1058 .  -pc_gamg_use_sa_esteig <true,false>
1059 
1060    Notes:
1061 
1062    Level: intermediate
1063 
1064 .seealso: ()
1065 @*/
1066 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1067 {
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1072   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1077 {
1078   PC_MG   *mg      = (PC_MG*)pc->data;
1079   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1080 
1081   PetscFunctionBegin;
1082   pc_gamg->use_sa_esteig = n ? 1 : 0;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 /*@C
1087    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1088 
1089    Collective on PC
1090 
1091    Input Parameters:
1092 +  pc - the preconditioner context
1093 -  t - ksp type
1094 
1095    Options Database Key:
1096 .  -pc_gamg_esteig_ksp_type <type>
1097 
1098    Notes:
1099 
1100    Level: intermediate
1101 
1102 .seealso: ()
1103 @*/
1104 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1105 {
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1110   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1115 {
1116   PetscErrorCode ierr;
1117   PC_MG   *mg      = (PC_MG*)pc->data;
1118   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1119 
1120   PetscFunctionBegin;
1121   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*@
1126    PCGAMGSetEigenvalues - Set eigenvalues
1127 
1128    Collective on PC
1129 
1130    Input Parameters:
1131 +  pc - the preconditioner context
1132 -  emax - max eigenvalue
1133 .  emin - min eigenvalue
1134 
1135    Options Database Key:
1136 .  -pc_gamg_eigenvalues
1137 
1138    Level: intermediate
1139 
1140 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1141 @*/
1142 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1143 {
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1148   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1152 {
1153   PC_MG          *mg      = (PC_MG*)pc->data;
1154   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1155 
1156   PetscFunctionBegin;
1157   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1158   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1159   pc_gamg->emax = emax;
1160   pc_gamg->emin = emin;
1161 
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 /*@
1166    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1167 
1168    Collective on PC
1169 
1170    Input Parameters:
1171 +  pc - the preconditioner context
1172 -  n - PETSC_TRUE or PETSC_FALSE
1173 
1174    Options Database Key:
1175 .  -pc_gamg_reuse_interpolation <true,false>
1176 
1177    Level: intermediate
1178 
1179    Notes:
1180     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1181           rebuilding the preconditioner quicker.
1182 
1183 .seealso: ()
1184 @*/
1185 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1186 {
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1191   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1196 {
1197   PC_MG   *mg      = (PC_MG*)pc->data;
1198   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1199 
1200   PetscFunctionBegin;
1201   pc_gamg->reuse_prol = n;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /*@
1206    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1207 
1208    Collective on PC
1209 
1210    Input Parameters:
1211 +  pc - the preconditioner context
1212 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1213 
1214    Options Database Key:
1215 .  -pc_gamg_asm_use_agg
1216 
1217    Level: intermediate
1218 
1219 .seealso: ()
1220 @*/
1221 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1222 {
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1227   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1232 {
1233   PC_MG   *mg      = (PC_MG*)pc->data;
1234   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1235 
1236   PetscFunctionBegin;
1237   pc_gamg->use_aggs_in_asm = flg;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 /*@
1242    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1243 
1244    Collective on PC
1245 
1246    Input Parameters:
1247 +  pc - the preconditioner context
1248 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1249 
1250    Options Database Key:
1251 .  -pc_gamg_use_parallel_coarse_grid_solver
1252 
1253    Level: intermediate
1254 
1255 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1256 @*/
1257 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1258 {
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1263   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1268 {
1269   PC_MG   *mg      = (PC_MG*)pc->data;
1270   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1271 
1272   PetscFunctionBegin;
1273   pc_gamg->use_parallel_coarse_grid_solver = flg;
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 /*@
1278    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1279 
1280    Collective on PC
1281 
1282    Input Parameters:
1283 +  pc - the preconditioner context
1284 -  flg - PETSC_TRUE to pin coarse grids to CPU
1285 
1286    Options Database Key:
1287 .  -pc_gamg_cpu_pin_coarse_grids
1288 
1289    Level: intermediate
1290 
1291 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1292 @*/
1293 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1294 {
1295   PetscErrorCode ierr;
1296 
1297   PetscFunctionBegin;
1298   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1299   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1304 {
1305   PC_MG   *mg      = (PC_MG*)pc->data;
1306   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1307 
1308   PetscFunctionBegin;
1309   pc_gamg->cpu_pin_coarse_grids = flg;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 /*@
1314    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1315 
1316    Collective on PC
1317 
1318    Input Parameters:
1319 +  pc - the preconditioner context
1320 -  flg - Layout type
1321 
1322    Options Database Key:
1323 .  -pc_gamg_coarse_grid_layout_type
1324 
1325    Level: intermediate
1326 
1327 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1328 @*/
1329 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1330 {
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1335   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1340 {
1341   PC_MG   *mg      = (PC_MG*)pc->data;
1342   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1343 
1344   PetscFunctionBegin;
1345   pc_gamg->layout_type = flg;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 /*@
1350    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1351 
1352    Not collective on PC
1353 
1354    Input Parameters:
1355 +  pc - the preconditioner
1356 -  n - the maximum number of levels to use
1357 
1358    Options Database Key:
1359 .  -pc_mg_levels
1360 
1361    Level: intermediate
1362 
1363 .seealso: ()
1364 @*/
1365 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1366 {
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1371   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1376 {
1377   PC_MG   *mg      = (PC_MG*)pc->data;
1378   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1379 
1380   PetscFunctionBegin;
1381   pc_gamg->Nlevels = n;
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 /*@
1386    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1387 
1388    Not collective on PC
1389 
1390    Input Parameters:
1391 +  pc - the preconditioner context
1392 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1393 -  n - number of threshold values provided in array
1394 
1395    Options Database Key:
1396 .  -pc_gamg_threshold <threshold>
1397 
1398    Notes:
1399     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1400     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1401 
1402     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1403     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1404     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1405 
1406    Level: intermediate
1407 
1408 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1409 @*/
1410 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1411 {
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1416   if (n) PetscValidRealPointer(v,2);
1417   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1422 {
1423   PC_MG   *mg      = (PC_MG*)pc->data;
1424   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1425   PetscInt i;
1426   PetscFunctionBegin;
1427   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1428   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 /*@
1433    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1434 
1435    Collective on PC
1436 
1437    Input Parameters:
1438 +  pc - the preconditioner context
1439 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1440 -  n - number of values provided in array
1441 
1442    Options Database Key:
1443 .  -pc_gamg_rank_reduction_factors <factors>
1444 
1445    Level: intermediate
1446 
1447 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1448 @*/
1449 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1450 {
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1455   if (n) PetscValidIntPointer(v,2);
1456   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1461 {
1462   PC_MG   *mg      = (PC_MG*)pc->data;
1463   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1464   PetscInt i;
1465   PetscFunctionBegin;
1466   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1467   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 /*@
1472    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1473 
1474    Not collective on PC
1475 
1476    Input Parameters:
1477 +  pc - the preconditioner context
1478 -  scale - the threshold value reduction, ussually < 1.0
1479 
1480    Options Database Key:
1481 .  -pc_gamg_threshold_scale <v>
1482 
1483    Notes:
1484    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1485    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1486 
1487    Level: advanced
1488 
1489 .seealso: PCGAMGSetThreshold()
1490 @*/
1491 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1492 {
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1497   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1502 {
1503   PC_MG   *mg      = (PC_MG*)pc->data;
1504   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1505   PetscFunctionBegin;
1506   pc_gamg->threshold_scale = v;
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 /*@C
1511    PCGAMGSetType - Set solution method
1512 
1513    Collective on PC
1514 
1515    Input Parameters:
1516 +  pc - the preconditioner context
1517 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1518 
1519    Options Database Key:
1520 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1521 
1522    Level: intermediate
1523 
1524 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1525 @*/
1526 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1527 {
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1532   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 /*@C
1537    PCGAMGGetType - Get solution method
1538 
1539    Collective on PC
1540 
1541    Input Parameter:
1542 .  pc - the preconditioner context
1543 
1544    Output Parameter:
1545 .  type - the type of algorithm used
1546 
1547    Level: intermediate
1548 
1549 .seealso: PCGAMGSetType(), PCGAMGType
1550 @*/
1551 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1552 {
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1557   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1562 {
1563   PC_MG          *mg      = (PC_MG*)pc->data;
1564   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1565 
1566   PetscFunctionBegin;
1567   *type = pc_gamg->type;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1572 {
1573   PetscErrorCode ierr,(*r)(PC);
1574   PC_MG          *mg      = (PC_MG*)pc->data;
1575   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1576 
1577   PetscFunctionBegin;
1578   pc_gamg->type = type;
1579   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1580   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1581   if (pc_gamg->ops->destroy) {
1582     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1583     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1584     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1585     /* cleaning up common data in pc_gamg - this should disapear someday */
1586     pc_gamg->data_cell_cols = 0;
1587     pc_gamg->data_cell_rows = 0;
1588     pc_gamg->orig_data_cell_cols = 0;
1589     pc_gamg->orig_data_cell_rows = 0;
1590     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1591     pc_gamg->data_sz = 0;
1592   }
1593   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1594   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1595   ierr = (*r)(pc);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 /* -------------------------------------------------------------------------- */
1600 /*
1601    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1602 
1603    Input Parameter:
1604 .  pc - the preconditioner context
1605 
1606    Output Parameter:
1607 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1608 
1609    Level: advanced
1610 */
1611 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1612 {
1613   PetscErrorCode ierr;
1614   PC_MG          *mg      = (PC_MG*)pc->data;
1615   PC_MG_Levels   **mglevels = mg->levels;
1616   PetscInt       lev;
1617   PetscLogDouble nnz0 = 0, sgc = 0;
1618   MatInfo        info;
1619 
1620   PetscFunctionBegin;
1621   if (!pc->setupcalled) {
1622     *gc = 0;
1623     PetscFunctionReturn(0);
1624   }
1625   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1626   for (lev=0; lev<mg->nlevels; lev++) {
1627     Mat dB;
1628     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1629     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1630     sgc += info.nz_used;
1631     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1632   }
1633   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1634   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1639 {
1640   PetscErrorCode ierr,i;
1641   PC_MG          *mg      = (PC_MG*)pc->data;
1642   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1643   PetscReal       gc=0;
1644   PetscFunctionBegin;
1645   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1646   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1647   for (i=0;i<mg->nlevels; i++) {
1648     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1649   }
1650   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1651   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1652   if (pc_gamg->use_aggs_in_asm) {
1653     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1654   }
1655   if (pc_gamg->use_parallel_coarse_grid_solver) {
1656     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1657   }
1658 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1659   if (pc_gamg->cpu_pin_coarse_grids) {
1660     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1661   }
1662 #endif
1663   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1664   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1665   /* } else { */
1666   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1667   /* } */
1668   if (pc_gamg->ops->view) {
1669     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1670   }
1671   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1672   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1677 {
1678   PetscErrorCode ierr;
1679   PC_MG          *mg      = (PC_MG*)pc->data;
1680   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1681   PetscBool      flag,f2;
1682   MPI_Comm       comm;
1683   char           prefix[256],tname[32];
1684   PetscInt       i,n;
1685   const char     *pcpre;
1686   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1687   PetscFunctionBegin;
1688   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1689   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1690   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1691   if (flag) {
1692     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1693   }
1694   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1695   if (flag) {
1696     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1697   }
1698   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1699   f2 = PETSC_TRUE;
1700   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1701   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1702   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1703   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1704   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1705   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1706   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1707   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1708   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1709   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1710   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1711   n = PETSC_MG_MAXLEVELS;
1712   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1713   if (!flag || n < PETSC_MG_MAXLEVELS) {
1714     if (!flag) n = 1;
1715     i = n;
1716     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1717   }
1718   n = PETSC_MG_MAXLEVELS;
1719   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1720   if (!flag) i = 0;
1721   else i = n;
1722   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1723   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1724   {
1725     PetscReal eminmax[2] = {0., 0.};
1726     n = 2;
1727     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1728     if (flag) {
1729       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1730       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1731     }
1732   }
1733   /* set options for subtype */
1734   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1735 
1736   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1737   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1738   ierr = PetscOptionsTail();CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 /* -------------------------------------------------------------------------- */
1743 /*MC
1744      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1745 
1746    Options Database Keys:
1747 +   -pc_gamg_type <type> - one of agg, geo, or classical
1748 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1749 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1750 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1751 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1752                                         equations on each process that has degrees of freedom
1753 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1754 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1755 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1756 
1757    Options Database Keys for default Aggregation:
1758 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1759 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1760 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1761 
1762    Multigrid options:
1763 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1764 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1765 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1766 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1767 
1768 
1769   Notes:
1770     In order to obtain good performance for PCGAMG for vector valued problems you must
1771        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1772        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1773        See the Users Manual Chapter 4 for more details
1774 
1775   Level: intermediate
1776 
1777 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1778            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1779 M*/
1780 
1781 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1782 {
1783   PetscErrorCode ierr,i;
1784   PC_GAMG        *pc_gamg;
1785   PC_MG          *mg;
1786 
1787   PetscFunctionBegin;
1788    /* register AMG type */
1789   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1790 
1791   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1792   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1793   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1794 
1795   /* create a supporting struct and attach it to pc */
1796   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1797   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1798   mg           = (PC_MG*)pc->data;
1799   mg->innerctx = pc_gamg;
1800 
1801   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1802 
1803   /* these should be in subctx but repartitioning needs simple arrays */
1804   pc_gamg->data_sz = 0;
1805   pc_gamg->data    = NULL;
1806 
1807   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1808   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1809   pc->ops->setup          = PCSetUp_GAMG;
1810   pc->ops->reset          = PCReset_GAMG;
1811   pc->ops->destroy        = PCDestroy_GAMG;
1812   mg->view                = PCView_GAMG;
1813 
1814   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1832   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1834   pc_gamg->repart           = PETSC_FALSE;
1835   pc_gamg->reuse_prol       = PETSC_FALSE;
1836   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1837   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1838   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1839   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1840   pc_gamg->min_eq_proc      = 50;
1841   pc_gamg->coarse_eq_limit  = 50;
1842   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1843   pc_gamg->threshold_scale = 1.;
1844   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1845   pc_gamg->current_level    = 0; /* don't need to init really */
1846   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1847   pc_gamg->esteig_max_it    = 10;
1848   pc_gamg->use_sa_esteig    = -1;
1849   pc_gamg->emin             = 0;
1850   pc_gamg->emax             = 0;
1851 
1852   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1853 
1854   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1855   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 /*@C
1860  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1861     from PCInitializePackage().
1862 
1863  Level: developer
1864 
1865  .seealso: PetscInitialize()
1866 @*/
1867 PetscErrorCode PCGAMGInitializePackage(void)
1868 {
1869   PetscErrorCode ierr;
1870 
1871   PetscFunctionBegin;
1872   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1873   PCGAMGPackageInitialized = PETSC_TRUE;
1874   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1875   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1876   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1877   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1878 
1879   /* general events */
1880   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1881   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1882   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1883   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1884   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1885   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1886   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1887 
1888 #if defined PETSC_GAMG_USE_LOG
1889   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1890   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1891   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1892   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1893   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1894   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1895   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1896   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1897   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1898   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1899   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1900   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1901   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1902   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1903   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1904   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1905   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1906 
1907   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1908   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1909   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1910   /* create timer stages */
1911 #if defined GAMG_STAGES
1912   {
1913     char     str[32];
1914     PetscInt lidx;
1915     sprintf(str,"MG Level %d (finest)",0);
1916     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1917     for (lidx=1; lidx<9; lidx++) {
1918       sprintf(str,"MG Level %d",(int)lidx);
1919       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1920     }
1921   }
1922 #endif
1923 #endif
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 /*@C
1928  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1929     called from PetscFinalize() automatically.
1930 
1931  Level: developer
1932 
1933  .seealso: PetscFinalize()
1934 @*/
1935 PetscErrorCode PCGAMGFinalizePackage(void)
1936 {
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   PCGAMGPackageInitialized = PETSC_FALSE;
1941   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 /*@C
1946  PCGAMGRegister - Register a PCGAMG implementation.
1947 
1948  Input Parameters:
1949  + type - string that will be used as the name of the GAMG type.
1950  - create - function for creating the gamg context.
1951 
1952   Level: advanced
1953 
1954  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1955 @*/
1956 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1957 {
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1962   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966