xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 9cdcbf30569e2d5919a4e1b2ebe0e3d599e8589b)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /* Hack to access same_local_solves */
7 
8 #if defined PETSC_GAMG_USE_LOG
9 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
10 #endif
11 
12 #if defined PETSC_USE_LOG
13 PetscLogEvent PC_GAMGGraph_AGG;
14 PetscLogEvent PC_GAMGGraph_GEO;
15 PetscLogEvent PC_GAMGCoarsen_AGG;
16 PetscLogEvent PC_GAMGCoarsen_GEO;
17 PetscLogEvent PC_GAMGProlongator_AGG;
18 PetscLogEvent PC_GAMGProlongator_GEO;
19 PetscLogEvent PC_GAMGOptProlongator_AGG;
20 #endif
21 
22 /* #define GAMG_STAGES */
23 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
24 static PetscLogStage gamg_stages[PETSC_GAMG_MAXLEVELS];
25 #endif
26 
27 static PetscFunctionList GAMGList = 0;
28 static PetscBool PCGAMGPackageInitialized;
29 
30 /* ----------------------------------------------------------------------------- */
31 PetscErrorCode PCReset_GAMG(PC pc)
32 {
33   PetscErrorCode ierr;
34   PC_MG          *mg      = (PC_MG*)pc->data;
35   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
36 
37   PetscFunctionBegin;
38   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
39   pc_gamg->data_sz = 0;
40   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 /* -------------------------------------------------------------------------- */
45 /*
46    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
47      of active processors.
48 
49    Input Parameter:
50    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
51           'pc_gamg->data_sz' are changed via repartitioning/reduction.
52    . Amat_fine - matrix on this fine (k) level
53    . cr_bs - coarse block size
54    In/Output Parameter:
55    . a_P_inout - prolongation operator to the next level (k-->k-1)
56    . a_nactive_proc - number of active procs
57    Output Parameter:
58    . a_Amat_crs - coarse matrix that is created (k-1)
59 */
60 
61 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
62 {
63   PetscErrorCode  ierr;
64   PC_MG           *mg         = (PC_MG*)pc->data;
65   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
66   Mat             Cmat,Pold=*a_P_inout;
67   MPI_Comm        comm;
68   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
69   PetscInt        ncrs_eq,ncrs,f_bs;
70 
71   PetscFunctionBegin;
72   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
73   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
74   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
75   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
76   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
77 
78   if (Pcolumnperm) *Pcolumnperm = NULL;
79 
80   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
81   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
82   if (pc_gamg->data_cell_rows>0) {
83     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
84   } else {
85     PetscInt  bs;
86     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
87     ncrs = ncrs_eq/bs;
88   }
89   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
90   if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) new_size = 1;
91   else {
92     PetscInt ncrs_eq_glob;
93     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
94     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
95     if (!new_size) new_size = 1; /* not likely, posible? */
96     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
97   }
98 
99   if (new_size==nactive) {
100     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
101     if (new_size < size) {
102       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
103       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%D) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
104       if (pc_gamg->cpu_pin_coarse_grids) {
105         ierr = MatPinToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
106         ierr = MatPinToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
107       }
108     }
109     /* we know that the grid structure can be reused in MatPtAP */
110   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
111     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor;
112     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
113     nloc_old = ncrs_eq/cr_bs;
114     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
115 #if defined PETSC_GAMG_USE_LOG
116     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
117 #endif
118     /* get new_size and rfactor */
119     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
120       /* find factor */
121       if (new_size == 1) rfactor = size; /* don't modify */
122       else {
123         PetscReal best_fact = 0.;
124         jj = -1;
125         for (kk = 1 ; kk <= size ; kk++) {
126           if (!(size%kk)) { /* a candidate */
127             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
128             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
129             if (fact > best_fact) {
130               best_fact = fact; jj = kk;
131             }
132           }
133         }
134         if (jj != -1) rfactor = jj;
135         else rfactor = 1; /* a prime */
136         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
137         else expand_factor = rfactor;
138       }
139       new_size = size/rfactor; /* make new size one that is factor */
140       if (new_size==nactive) {
141         *a_Amat_crs = Cmat; /* output - no repartitioning or reduction, bail out because nested here */
142         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%D, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
143 #if defined PETSC_GAMG_USE_LOG
144         ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
145 #endif
146         PetscFunctionReturn(0);
147       }
148     }
149     /* make 'is_eq_newproc' */
150     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
151     if (pc_gamg->repart) {
152       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
153       Mat      adj;
154       ierr = PetscInfo4(pc,"Repartition: size (active): %D --> %D, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
155       /* get 'adj' */
156       if (cr_bs == 1) {
157         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
158       } else {
159         /* make a scalar matrix to partition (no Stokes here) */
160         Mat               tMat;
161         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
162         const PetscScalar *vals;
163         const PetscInt    *idx;
164         PetscInt          *d_nnz, *o_nnz, M, N;
165         static PetscInt   llev = 0;
166         MatType           mtype;
167 
168         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
169         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
170         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
171         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
172           ierr      = MatGetRow(Cmat,Ii,&ncols,0,0);CHKERRQ(ierr);
173           d_nnz[jj] = ncols/cr_bs;
174           o_nnz[jj] = ncols/cr_bs;
175           ierr      = MatRestoreRow(Cmat,Ii,&ncols,0,0);CHKERRQ(ierr);
176           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
177           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
178         }
179 
180         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
181         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
182         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
183         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
184         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
185         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
186         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
187 
188         for (ii = Istart_crs; ii < Iend_crs; ii++) {
189           PetscInt dest_row = ii/cr_bs;
190           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
191           for (jj = 0; jj < ncols; jj++) {
192             PetscInt    dest_col = idx[jj]/cr_bs;
193             PetscScalar v        = 1.0;
194             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
195           }
196           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
197         }
198         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200 
201         if (llev++ == -1) {
202           PetscViewer viewer; char fname[32];
203           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
204           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
205           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
206           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
207         }
208         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
209         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
210       } /* create 'adj' */
211 
212       { /* partition: get newproc_idx */
213         char            prefix[256];
214         const char      *pcpre;
215         const PetscInt  *is_idx;
216         MatPartitioning mpart;
217         IS              proc_is;
218 
219         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
220         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
221         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
222         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
223         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
224         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
225         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
226         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
227         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
228 
229         /* collect IS info */
230         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
231         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
232         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
233           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
234             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
235           }
236         }
237         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
238         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
239       }
240       ierr = MatDestroy(&adj);CHKERRQ(ierr);
241 
242       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
243       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
244     } else { /* simple aggreagtion of parts -- 'is_eq_newproc' */
245       PetscInt targetPE;
246       if (new_size==nactive) {
247         *a_Amat_crs = Cmat; /* output - no repartitioning or reduction, bail out because nested here */
248         ierr        = PetscFree(counts);CHKERRQ(ierr);
249         ierr = PetscInfo2(pc,"Aggregate processors noop: new_size=%D, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
250 #if defined PETSC_GAMG_USE_LOG
251         ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
252 #endif
253         PetscFunctionReturn(0);
254       }
255       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
256       targetPE = (rank/rfactor)*expand_factor;
257       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
258     } /* end simple 'is_eq_newproc' */
259 
260     /*
261       Create an index set from the is_eq_newproc index set to indicate the mapping TO
262     */
263     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
264     is_eq_num_prim = is_eq_num;
265     /*
266       Determine how many equations/vertices are assigned to each processor
267     */
268     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
269     ncrs_eq_new = counts[rank];
270     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
271     ncrs_new = ncrs_eq_new/cr_bs;
272 
273     ierr = PetscFree(counts);CHKERRQ(ierr);
274 #if defined PETSC_GAMG_USE_LOG
275     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
276 #endif
277     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
278     {
279       Vec            src_crd, dest_crd;
280       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
281       VecScatter     vecscat;
282       PetscScalar    *array;
283       IS isscat;
284       /* move data (for primal equations only) */
285       /* Create a vector to contain the newly ordered element information */
286       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
287       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
288       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
289       /*
290 	There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
291 	a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
292       */
293       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
294       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
295       for (ii=0,jj=0; ii<ncrs; ii++) {
296 	PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
297 	for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
298       }
299       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
300       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
301       ierr = PetscFree(tidx);CHKERRQ(ierr);
302       /*
303 	Create a vector to contain the original vertex information for each element
304       */
305       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
306       for (jj=0; jj<ndata_cols; jj++) {
307 	const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
308 	for (ii=0; ii<ncrs; ii++) {
309 	  for (kk=0; kk<ndata_rows; kk++) {
310 	    PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
311 	    PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
312 	    ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
313 	  }
314 	}
315       }
316       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
317       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
318       /*
319 	Scatter the element vertex information (still in the original vertex ordering)
320 	to the correct processor
321       */
322       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
323       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
324       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
327       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
328       /*
329 	Put the element vertex data into a new allocation of the gdata->ele
330       */
331       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
332       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
333 
334       pc_gamg->data_sz = node_data_sz*ncrs_new;
335       strideNew        = ncrs_new*ndata_rows;
336 
337       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
338       for (jj=0; jj<ndata_cols; jj++) {
339 	for (ii=0; ii<ncrs_new; ii++) {
340 	  for (kk=0; kk<ndata_rows; kk++) {
341 	    PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
342 	    pc_gamg->data[ix] = PetscRealPart(array[jx]);
343 	  }
344 	}
345       }
346       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
347       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
348     }
349     /* move A and P (columns) with new layout */
350 #if defined PETSC_GAMG_USE_LOG
351     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
352 #endif
353     /*
354       Invert for MatCreateSubMatrix
355     */
356     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
357     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
358     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
359     if (is_eq_num != is_eq_num_prim) {
360       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
361     }
362     if (Pcolumnperm) {
363       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
364       *Pcolumnperm = new_eq_indices;
365     }
366     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
367 #if defined PETSC_GAMG_USE_LOG
368     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
369     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
370 #endif
371     /* 'a_Amat_crs' output */
372     {
373       Mat mat;
374       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
375       *a_Amat_crs = mat;
376     }
377     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
378 
379 #if defined PETSC_GAMG_USE_LOG
380     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
381 #endif
382     /* prolongator */
383     {
384       IS       findices;
385       PetscInt Istart,Iend;
386       Mat      Pnew;
387 
388       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
389 #if defined PETSC_GAMG_USE_LOG
390       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
391 #endif
392       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
393       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
394       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
395       ierr = ISDestroy(&findices);CHKERRQ(ierr);
396 
397 #if defined PETSC_GAMG_USE_LOG
398       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
399 #endif
400       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
401 
402       /* output - repartitioned */
403       *a_P_inout = Pnew;
404     }
405     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
406 
407     *a_nactive_proc = new_size; /* output */
408 
409     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
410     if (pc_gamg->cpu_pin_coarse_grids) {
411 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
412       ierr = PetscInfo(pc,"Pinning level to the CPU\n");CHKERRQ(ierr);
413 #endif
414       ierr = MatPinToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
415       ierr = MatPinToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
416       if (1) { /* lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
417         Mat         A = *a_Amat_crs, P = *a_P_inout;
418         PetscMPIInt size;
419         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
420         if (size > 1) {
421           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
422           ierr = VecPinToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
423           ierr = VecPinToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
424         }
425       }
426     }
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 /* -------------------------------------------------------------------------- */
432 /*
433    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
434                     by setting data structures and options.
435 
436    Input Parameter:
437 .  pc - the preconditioner context
438 
439 */
440 PetscErrorCode PCSetUp_GAMG(PC pc)
441 {
442   PetscErrorCode ierr;
443   PC_MG          *mg      = (PC_MG*)pc->data;
444   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
445   Mat            Pmat     = pc->pmat;
446   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_GAMG_MAXLEVELS];
447   MPI_Comm       comm;
448   PetscMPIInt    rank,size,nactivepe;
449   Mat            Aarr[PETSC_GAMG_MAXLEVELS],Parr[PETSC_GAMG_MAXLEVELS];
450   IS             *ASMLocalIDsArr[PETSC_GAMG_MAXLEVELS];
451   PetscLogDouble nnz0=0.,nnztot=0.;
452   MatInfo        info;
453   PetscBool      is_last = PETSC_FALSE;
454 
455   PetscFunctionBegin;
456   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
457   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
459 
460   if (pc_gamg->setup_count++ > 0) {
461     if ((PetscBool)(!pc_gamg->reuse_prol)) {
462       /* reset everything */
463       ierr = PCReset_MG(pc);CHKERRQ(ierr);
464       pc->setupcalled = 0;
465     } else {
466       PC_MG_Levels **mglevels = mg->levels;
467       /* just do Galerkin grids */
468       Mat          B,dA,dB;
469 
470       if (!pc->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCSetUp() has not been called yet");
471       if (pc_gamg->Nlevels > 1) {
472         /* currently only handle case where mat and pmat are the same on coarser levels */
473         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
474         /* (re)set to get dirty flag */
475         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
476 
477         for (level=pc_gamg->Nlevels-2; level>=0; level--) {
478           /* 2nd solve, matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
479           if (pc_gamg->setup_count==2 /* && pc_gamg->repart||reduction */) {
480             ierr = PetscInfo2(pc,"new RAP after first solve level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
481             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_INITIAL_MATRIX,2.0,&B);CHKERRQ(ierr);
482             ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr);
483             mglevels[level]->A = B;
484           } else {
485             ierr = PetscInfo2(pc,"RAP after first solve reusing matrix level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
486             ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
487             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_REUSE_MATRIX,1.0,&B);CHKERRQ(ierr);
488           }
489           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
490           dB   = B;
491         }
492       }
493 
494       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
495       PetscFunctionReturn(0);
496     }
497   }
498 
499   if (!pc_gamg->data) {
500     if (pc_gamg->orig_data) {
501       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
502       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
503 
504       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
505       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
506       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
507 
508       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
509       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
510     } else {
511       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
512       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
513     }
514   }
515 
516   /* cache original data for reuse */
517   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
518     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
519     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
520     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
521     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
522   }
523 
524   /* get basic dims */
525   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
526   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
527 
528   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
529   nnz0   = info.nz_used;
530   nnztot = info.nz_used;
531   ierr = PetscInfo6(pc,"level %d) N=%D, n data rows=%d, n data cols=%d, nnz/row (ave)=%d, np=%d\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
532 
533   /* Get A_i and R_i */
534   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
535     pc_gamg->current_level = level;
536     level1 = level + 1;
537 #if defined PETSC_GAMG_USE_LOG
538     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
539 #if (defined GAMG_STAGES)
540     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
541 #endif
542 #endif
543     { /* construct prolongator */
544       Mat              Gmat;
545       PetscCoarsenData *agg_lists;
546       Mat              Prol11;
547 
548       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
549       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
550       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
551 
552       /* could have failed to create new level */
553       if (Prol11) {
554         /* get new block size of coarse matrices */
555         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
556 
557         if (pc_gamg->ops->optprolongator) {
558           /* smooth */
559           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
560         }
561 
562         if (pc_gamg->use_aggs_in_asm) {
563           PetscInt bs;
564           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
565           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
566         }
567 
568         Parr[level1] = Prol11;
569       } else Parr[level1] = NULL; /* failed to coarsen */
570 
571       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
572       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
573     } /* construct prolongator scope */
574 #if defined PETSC_GAMG_USE_LOG
575     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
576 #endif
577     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
578     if (!Parr[level1]) { /* failed to coarsen */
579       ierr =  PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
580 #if defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES
581       ierr = PetscLogStagePop();CHKERRQ(ierr);
582 #endif
583       break;
584     }
585 #if defined PETSC_GAMG_USE_LOG
586     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
587 #endif
588     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
589     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ????????");
590     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
591     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
592     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
593 
594 #if defined PETSC_GAMG_USE_LOG
595     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
596 #endif
597     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
598     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
599     nnztot += info.nz_used;
600     ierr = PetscInfo5(pc,"%d) N=%D, n data cols=%d, nnz/row (ave)=%d, %d active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
601 
602 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
603     ierr = PetscLogStagePop();CHKERRQ(ierr);
604 #endif
605     /* stop if one node or one proc -- could pull back for singular problems */
606     if ( (pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2) ) {
607       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
608       level++;
609       break;
610     }
611   } /* levels */
612   ierr                  = PetscFree(pc_gamg->data);CHKERRQ(ierr);
613 
614   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
615   pc_gamg->Nlevels = level + 1;
616   fine_level       = level;
617   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
618 
619   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
620     /* set default smoothers & set operators */
621     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
622       KSP smoother;
623       PC  subpc;
624 
625       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
626       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
627 
628       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
629       /* set ops */
630       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
631       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
632 
633       /* set defaults */
634       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
635 
636       /* set blocks for ASM smoother that uses the 'aggregates' */
637       if (pc_gamg->use_aggs_in_asm) {
638         PetscInt sz;
639         IS       *iss;
640 
641         sz   = nASMBlocksArr[level];
642         iss  = ASMLocalIDsArr[level];
643         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
644         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
645         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
646         if (!sz) {
647           IS       is;
648           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
649           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
650           ierr = ISDestroy(&is);CHKERRQ(ierr);
651         } else {
652           PetscInt kk;
653           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
654           for (kk=0; kk<sz; kk++) {
655             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
656           }
657           ierr = PetscFree(iss);CHKERRQ(ierr);
658         }
659         ASMLocalIDsArr[level] = NULL;
660         nASMBlocksArr[level]  = 0;
661       } else {
662         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
663       }
664     }
665     {
666       /* coarse grid */
667       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
668       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
669       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
670       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
671       if (!pc_gamg->use_parallel_coarse_grid_solver) {
672         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
673         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
674         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
675         ierr = PCSetUp(subpc);CHKERRQ(ierr);
676         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
677         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
678         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
679         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
680         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
681         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
682         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
683         /* This flag gets reset by PCBJacobiGetSubKSP(), but our BJacobi really does the same algorithm everywhere (and in
684          * fact, all but one process will have zero dofs), so we reset the flag to avoid having PCView_BJacobi attempt to
685          * view every subdomain as though they were different. */
686         ((PC_BJacobi*)subpc->data)->same_local_solves = PETSC_TRUE;
687       }
688     }
689 
690     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
691     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
692     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
693     ierr = PetscOptionsEnd();CHKERRQ(ierr);
694     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
695 
696     /* clean up */
697     for (level=1; level<pc_gamg->Nlevels; level++) {
698       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
699       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
700     }
701     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
702   } else {
703     KSP smoother;
704     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
705     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
706     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
707     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
708     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
709   }
710   PetscFunctionReturn(0);
711 }
712 
713 /* ------------------------------------------------------------------------- */
714 /*
715  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
716    that was created with PCCreate_GAMG().
717 
718    Input Parameter:
719 .  pc - the preconditioner context
720 
721    Application Interface Routine: PCDestroy()
722 */
723 PetscErrorCode PCDestroy_GAMG(PC pc)
724 {
725   PetscErrorCode ierr;
726   PC_MG          *mg     = (PC_MG*)pc->data;
727   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
728 
729   PetscFunctionBegin;
730   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
731   if (pc_gamg->ops->destroy) {
732     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
733   }
734   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
735   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
736   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
737   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 /*@
742    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
743 
744    Logically Collective on PC
745 
746    Input Parameters:
747 +  pc - the preconditioner context
748 -  n - the number of equations
749 
750 
751    Options Database Key:
752 .  -pc_gamg_process_eq_limit <limit>
753 
754    Notes:
755     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
756           that has degrees of freedom
757 
758    Level: intermediate
759 
760 .seealso: PCGAMGSetCoarseEqLim()
761 @*/
762 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
763 {
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
768   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
773 {
774   PC_MG   *mg      = (PC_MG*)pc->data;
775   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
776 
777   PetscFunctionBegin;
778   if (n>0) pc_gamg->min_eq_proc = n;
779   PetscFunctionReturn(0);
780 }
781 
782 /*@
783    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
784 
785  Collective on PC
786 
787    Input Parameters:
788 +  pc - the preconditioner context
789 -  n - maximum number of equations to aim for
790 
791    Options Database Key:
792 .  -pc_gamg_coarse_eq_limit <limit>
793 
794    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
795      has less than 1000 unknowns.
796 
797    Level: intermediate
798 
799 .seealso: PCGAMGSetProcEqLim()
800 @*/
801 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
802 {
803   PetscErrorCode ierr;
804 
805   PetscFunctionBegin;
806   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
807   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
812 {
813   PC_MG   *mg      = (PC_MG*)pc->data;
814   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
815 
816   PetscFunctionBegin;
817   if (n>0) pc_gamg->coarse_eq_limit = n;
818   PetscFunctionReturn(0);
819 }
820 
821 /*@
822    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
823 
824    Collective on PC
825 
826    Input Parameters:
827 +  pc - the preconditioner context
828 -  n - PETSC_TRUE or PETSC_FALSE
829 
830    Options Database Key:
831 .  -pc_gamg_repartition <true,false>
832 
833    Notes:
834     this will generally improve the loading balancing of the work on each level
835 
836    Level: intermediate
837 
838 .seealso: ()
839 @*/
840 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
841 {
842   PetscErrorCode ierr;
843 
844   PetscFunctionBegin;
845   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
846   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
851 {
852   PC_MG   *mg      = (PC_MG*)pc->data;
853   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
854 
855   PetscFunctionBegin;
856   pc_gamg->repart = n;
857   PetscFunctionReturn(0);
858 }
859 
860 /*@
861    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
862 
863    Collective on PC
864 
865    Input Parameters:
866 +  pc - the preconditioner context
867 -  n - PETSC_TRUE or PETSC_FALSE
868 
869    Options Database Key:
870 .  -pc_gamg_reuse_interpolation <true,false>
871 
872    Level: intermediate
873 
874    Notes:
875     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
876           rebuilding the preconditioner quicker.
877 
878 .seealso: ()
879 @*/
880 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
881 {
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
886   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
891 {
892   PC_MG   *mg      = (PC_MG*)pc->data;
893   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
894 
895   PetscFunctionBegin;
896   pc_gamg->reuse_prol = n;
897   PetscFunctionReturn(0);
898 }
899 
900 /*@
901    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
902 
903    Collective on PC
904 
905    Input Parameters:
906 +  pc - the preconditioner context
907 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
908 
909    Options Database Key:
910 .  -pc_gamg_asm_use_agg
911 
912    Level: intermediate
913 
914 .seealso: ()
915 @*/
916 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
917 {
918   PetscErrorCode ierr;
919 
920   PetscFunctionBegin;
921   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
922   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
927 {
928   PC_MG   *mg      = (PC_MG*)pc->data;
929   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
930 
931   PetscFunctionBegin;
932   pc_gamg->use_aggs_in_asm = flg;
933   PetscFunctionReturn(0);
934 }
935 
936 /*@
937    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
938 
939    Collective on PC
940 
941    Input Parameters:
942 +  pc - the preconditioner context
943 -  flg - PETSC_TRUE to not force coarse grid onto one processor
944 
945    Options Database Key:
946 .  -pc_gamg_use_parallel_coarse_grid_solver
947 
948    Level: intermediate
949 
950 .seealso: ()
951 @*/
952 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
953 {
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
958   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
963 {
964   PC_MG   *mg      = (PC_MG*)pc->data;
965   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
966 
967   PetscFunctionBegin;
968   pc_gamg->use_parallel_coarse_grid_solver = flg;
969   PetscFunctionReturn(0);
970 }
971 
972 /*@
973    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
974 
975    Collective on PC
976 
977    Input Parameters:
978 +  pc - the preconditioner context
979 -  flg - PETSC_TRUE to pin coarse grids to CPU
980 
981    Options Database Key:
982 .  -pc_gamg_cpu_pin_coarse_grids
983 
984    Level: intermediate
985 
986 .seealso: ()
987 @*/
988 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
994   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
999 {
1000   PC_MG   *mg      = (PC_MG*)pc->data;
1001   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1002 
1003   PetscFunctionBegin;
1004   pc_gamg->cpu_pin_coarse_grids = flg;
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 /*@
1009    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1010 
1011    Collective on PC
1012 
1013    Input Parameters:
1014 +  pc - the preconditioner context
1015 -  flg - Layout type
1016 
1017    Options Database Key:
1018 .  -pc_gamg_coarse_grid_layout_type
1019 
1020    Level: intermediate
1021 
1022 .seealso: ()
1023 @*/
1024 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1025 {
1026   PetscErrorCode ierr;
1027 
1028   PetscFunctionBegin;
1029   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1030   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1035 {
1036   PC_MG   *mg      = (PC_MG*)pc->data;
1037   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1038 
1039   PetscFunctionBegin;
1040   pc_gamg->layout_type = flg;
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 /*@
1045    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1046 
1047    Not collective on PC
1048 
1049    Input Parameters:
1050 +  pc - the preconditioner
1051 -  n - the maximum number of levels to use
1052 
1053    Options Database Key:
1054 .  -pc_mg_levels
1055 
1056    Level: intermediate
1057 
1058 .seealso: ()
1059 @*/
1060 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1061 {
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1066   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1071 {
1072   PC_MG   *mg      = (PC_MG*)pc->data;
1073   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1074 
1075   PetscFunctionBegin;
1076   pc_gamg->Nlevels = n;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*@
1081    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1082 
1083    Not collective on PC
1084 
1085    Input Parameters:
1086 +  pc - the preconditioner context
1087 -  threshold - the threshold value, 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1088 
1089    Options Database Key:
1090 .  -pc_gamg_threshold <threshold>
1091 
1092    Notes:
1093     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1094     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1095 
1096    Level: intermediate
1097 
1098 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1099 @*/
1100 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1101 {
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1106   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1111 {
1112   PC_MG   *mg      = (PC_MG*)pc->data;
1113   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1114   PetscInt i;
1115   PetscFunctionBegin;
1116   for (i=0;i<n;i++) pc_gamg->threshold[i] = v[i];
1117   do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_GAMG_MAXLEVELS);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 /*@
1122    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1123 
1124    Not collective on PC
1125 
1126    Input Parameters:
1127 +  pc - the preconditioner context
1128 -  scale - the threshold value reduction, ussually < 1.0
1129 
1130    Options Database Key:
1131 .  -pc_gamg_threshold_scale <v>
1132 
1133    Level: advanced
1134 
1135 .seealso: ()
1136 @*/
1137 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1138 {
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1143   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1148 {
1149   PC_MG   *mg      = (PC_MG*)pc->data;
1150   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1151   PetscFunctionBegin;
1152   pc_gamg->threshold_scale = v;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 /*@C
1157    PCGAMGSetType - Set solution method
1158 
1159    Collective on PC
1160 
1161    Input Parameters:
1162 +  pc - the preconditioner context
1163 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1164 
1165    Options Database Key:
1166 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1167 
1168    Level: intermediate
1169 
1170 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1171 @*/
1172 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1173 {
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1178   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /*@C
1183    PCGAMGGetType - Get solution method
1184 
1185    Collective on PC
1186 
1187    Input Parameter:
1188 .  pc - the preconditioner context
1189 
1190    Output Parameter:
1191 .  type - the type of algorithm used
1192 
1193    Level: intermediate
1194 
1195 .seealso: PCGAMGSetType(), PCGAMGType
1196 @*/
1197 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1198 {
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1203   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1208 {
1209   PC_MG          *mg      = (PC_MG*)pc->data;
1210   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1211 
1212   PetscFunctionBegin;
1213   *type = pc_gamg->type;
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1218 {
1219   PetscErrorCode ierr,(*r)(PC);
1220   PC_MG          *mg      = (PC_MG*)pc->data;
1221   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1222 
1223   PetscFunctionBegin;
1224   pc_gamg->type = type;
1225   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1226   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1227   if (pc_gamg->ops->destroy) {
1228     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1229     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1230     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1231     /* cleaning up common data in pc_gamg - this should disapear someday */
1232     pc_gamg->data_cell_cols = 0;
1233     pc_gamg->data_cell_rows = 0;
1234     pc_gamg->orig_data_cell_cols = 0;
1235     pc_gamg->orig_data_cell_rows = 0;
1236     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1237     pc_gamg->data_sz = 0;
1238   }
1239   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1240   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1241   ierr = (*r)(pc);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 /* -------------------------------------------------------------------------- */
1246 /*
1247    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1248 
1249    Input Parameter:
1250 .  pc - the preconditioner context
1251 
1252    Output Parameter:
1253 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1254 
1255    Level: advanced
1256 */
1257 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1258 {
1259   PetscErrorCode ierr;
1260   PC_MG          *mg      = (PC_MG*)pc->data;
1261   PC_MG_Levels   **mglevels = mg->levels;
1262   PetscInt       lev, nnz0 = -1;
1263   MatInfo        info;
1264   PetscFunctionBegin;
1265   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1266   for (lev=0, *gc=0; lev<mg->nlevels; lev++) {
1267     Mat dB;
1268     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1269     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1270     *gc += (PetscReal)info.nz_used;
1271     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1272   }
1273   if (nnz0) *gc /= (PetscReal)nnz0;
1274   else *gc = 0;
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1279 {
1280   PetscErrorCode ierr,i;
1281   PC_MG          *mg      = (PC_MG*)pc->data;
1282   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1283   PetscReal       gc=0;
1284   PetscFunctionBegin;
1285   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1286   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1287   for (i=0;i<pc_gamg->current_level;i++) {
1288     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1289   }
1290   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1291   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1292   if (pc_gamg->use_aggs_in_asm) {
1293     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1294   }
1295   if (pc_gamg->use_parallel_coarse_grid_solver) {
1296     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1297   }
1298 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1299   if (pc_gamg->cpu_pin_coarse_grids) {
1300     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1301   }
1302 #endif
1303   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1304   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1305   /* } else { */
1306   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1307   /* } */
1308   if (pc_gamg->ops->view) {
1309     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1310   }
1311   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1312   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1317 {
1318   PetscErrorCode ierr;
1319   PC_MG          *mg      = (PC_MG*)pc->data;
1320   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1321   PetscBool      flag;
1322   MPI_Comm       comm;
1323   char           prefix[256];
1324   PetscInt       i,n;
1325   const char     *pcpre;
1326   PetscFunctionBegin;
1327   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1328   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1329   {
1330     char tname[256];
1331     ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1332     if (flag) {
1333       ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1334     }
1335     ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1336     ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1337     ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1338     ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1339     ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1340     i = (PetscInt)PCGAMG_LAYOUT_COMPACT;
1341     ierr = PetscOptionsInt("-pc_gamg_coarse_grid_layout_type","0: place reduced grids on processes in natural order; 1: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",i,&i,NULL);CHKERRQ(ierr);
1342     pc_gamg->layout_type = (PCGAMGLayoutType)i;
1343     ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1344     ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1345     ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1346     n = PETSC_GAMG_MAXLEVELS;
1347     ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1348     if (!flag || n < PETSC_GAMG_MAXLEVELS) {
1349       if (!flag) n = 1;
1350       i = n;
1351       do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_GAMG_MAXLEVELS);
1352     }
1353     ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1354 
1355     /* set options for subtype */
1356     if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1357   }
1358   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1359   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1360   ierr = PetscOptionsTail();CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 /* -------------------------------------------------------------------------- */
1365 /*MC
1366      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1367 
1368    Options Database Keys:
1369 +   -pc_gamg_type <type> - one of agg, geo, or classical
1370 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1371 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1372 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1373 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1374                                         equations on each process that has degrees of freedom
1375 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1376 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1377 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1378 
1379    Options Database Keys for default Aggregation:
1380 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1381 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1382 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1383 
1384    Multigrid options:
1385 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1386 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1387 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1388 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1389 
1390 
1391   Notes:
1392     In order to obtain good performance for PCGAMG for vector valued problems you must
1393        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1394        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1395        See the Users Manual Chapter 4 for more details
1396 
1397   Level: intermediate
1398 
1399 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1400            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation()
1401 M*/
1402 
1403 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1404 {
1405   PetscErrorCode ierr,i;
1406   PC_GAMG        *pc_gamg;
1407   PC_MG          *mg;
1408 
1409   PetscFunctionBegin;
1410    /* register AMG type */
1411   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1412 
1413   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1414   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1415   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1416 
1417   /* create a supporting struct and attach it to pc */
1418   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1419   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1420   mg           = (PC_MG*)pc->data;
1421   mg->innerctx = pc_gamg;
1422 
1423   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1424 
1425   pc_gamg->setup_count = 0;
1426   /* these should be in subctx but repartitioning needs simple arrays */
1427   pc_gamg->data_sz = 0;
1428   pc_gamg->data    = 0;
1429 
1430   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1431   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1432   pc->ops->setup          = PCSetUp_GAMG;
1433   pc->ops->reset          = PCReset_GAMG;
1434   pc->ops->destroy        = PCDestroy_GAMG;
1435   mg->view                = PCView_GAMG;
1436 
1437   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1448   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1449   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1450   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1451   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1452   pc_gamg->repart           = PETSC_FALSE;
1453   pc_gamg->reuse_prol       = PETSC_FALSE;
1454   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1455   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1456   pc_gamg->cpu_pin_coarse_grids = PETSC_TRUE;
1457   pc_gamg->layout_type      = PCGAMG_LAYOUT_COMPACT;
1458   pc_gamg->min_eq_proc      = 50;
1459   pc_gamg->coarse_eq_limit  = 50;
1460   for (i=0;i<PETSC_GAMG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1461   pc_gamg->threshold_scale = 1.;
1462   pc_gamg->Nlevels          = PETSC_GAMG_MAXLEVELS;
1463   pc_gamg->current_level    = 0; /* don't need to init really */
1464   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1465 
1466   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1467   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 /*@C
1472  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1473     from PCInitializePackage().
1474 
1475  Level: developer
1476 
1477  .seealso: PetscInitialize()
1478 @*/
1479 PetscErrorCode PCGAMGInitializePackage(void)
1480 {
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1485   PCGAMGPackageInitialized = PETSC_TRUE;
1486   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1487   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1488   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1489   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1490 
1491   /* general events */
1492   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1493   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1494   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1495   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1496   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1497   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1498   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1499 
1500 #if defined PETSC_GAMG_USE_LOG
1501   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1502   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1503   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1504   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1505   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1506   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1507   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1508   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1509   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1510   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1511   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1512   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1513   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1514   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1515   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1516   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1517   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1518 
1519   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1520   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1521   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1522   /* create timer stages */
1523 #if defined GAMG_STAGES
1524   {
1525     char     str[32];
1526     PetscInt lidx;
1527     sprintf(str,"MG Level %d (finest)",0);
1528     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1529     for (lidx=1; lidx<9; lidx++) {
1530       sprintf(str,"MG Level %d",lidx);
1531       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1532     }
1533   }
1534 #endif
1535 #endif
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /*@C
1540  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1541     called from PetscFinalize() automatically.
1542 
1543  Level: developer
1544 
1545  .seealso: PetscFinalize()
1546 @*/
1547 PetscErrorCode PCGAMGFinalizePackage(void)
1548 {
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   PCGAMGPackageInitialized = PETSC_FALSE;
1553   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /*@C
1558  PCGAMGRegister - Register a PCGAMG implementation.
1559 
1560  Input Parameters:
1561  + type - string that will be used as the name of the GAMG type.
1562  - create - function for creating the gamg context.
1563 
1564   Level: advanced
1565 
1566  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1567 @*/
1568 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1569 {
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1574   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578