xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision 1873fc9fedfc3bd8fc2f82ccd29ab8b44a80f1db)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/pc/impls/bjacobi/bjacobi.h> /* Hack to access same_local_solves */
7 
8 #if defined PETSC_GAMG_USE_LOG
9 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
10 #endif
11 
12 #if defined PETSC_USE_LOG
13 PetscLogEvent PC_GAMGGraph_AGG;
14 PetscLogEvent PC_GAMGGraph_GEO;
15 PetscLogEvent PC_GAMGCoarsen_AGG;
16 PetscLogEvent PC_GAMGCoarsen_GEO;
17 PetscLogEvent PC_GAMGProlongator_AGG;
18 PetscLogEvent PC_GAMGProlongator_GEO;
19 PetscLogEvent PC_GAMGOptProlongator_AGG;
20 #endif
21 
22 /* #define GAMG_STAGES */
23 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
24 static PetscLogStage gamg_stages[PETSC_GAMG_MAXLEVELS];
25 #endif
26 
27 static PetscFunctionList GAMGList = 0;
28 static PetscBool PCGAMGPackageInitialized;
29 
30 /* ----------------------------------------------------------------------------- */
31 PetscErrorCode PCReset_GAMG(PC pc)
32 {
33   PetscErrorCode ierr;
34   PC_MG          *mg      = (PC_MG*)pc->data;
35   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
36 
37   PetscFunctionBegin;
38   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
39   pc_gamg->data_sz = 0;
40   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 /* -------------------------------------------------------------------------- */
45 /*
46    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
47      of active processors.
48 
49    Input Parameter:
50    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
51           'pc_gamg->data_sz' are changed via repartitioning/reduction.
52    . Amat_fine - matrix on this fine (k) level
53    . cr_bs - coarse block size
54    In/Output Parameter:
55    . a_P_inout - prolongation operator to the next level (k-->k-1)
56    . a_nactive_proc - number of active procs
57    Output Parameter:
58    . a_Amat_crs - coarse matrix that is created (k-1)
59 */
60 
61 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
62 {
63   PetscErrorCode  ierr;
64   PC_MG           *mg         = (PC_MG*)pc->data;
65   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
66   Mat             Cmat,Pold=*a_P_inout;
67   MPI_Comm        comm;
68   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
69   PetscInt        ncrs_eq,ncrs,f_bs;
70 
71   PetscFunctionBegin;
72   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
73   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
74   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
75   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
76   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
77 
78   if (Pcolumnperm) *Pcolumnperm = NULL;
79 
80   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
81   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
82   if (pc_gamg->data_cell_rows>0) {
83     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
84   } else {
85     PetscInt  bs;
86     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
87     ncrs = ncrs_eq/bs;
88   }
89   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
90   if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) new_size = 1;
91   else {
92     PetscInt ncrs_eq_glob;
93     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
94     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
95     if (!new_size) new_size = 1; /* not likely, posible? */
96     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
97   }
98 
99   if (new_size==nactive) {
100     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
101     if (new_size < size) {
102       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
103       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
104       if (pc_gamg->cpu_pin_coarse_grids) {
105         ierr = MatPinToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
106         ierr = MatPinToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
107       }
108     }
109     /* we know that the grid structure can be reused in MatPtAP */
110   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
111     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
112     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
113     nloc_old = ncrs_eq/cr_bs;
114     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
115     /* get new_size and rfactor */
116     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
117       /* find factor */
118       if (new_size == 1) rfactor = size; /* don't modify */
119       else {
120         PetscReal best_fact = 0.;
121         jj = -1;
122         for (kk = 1 ; kk <= size ; kk++) {
123           if (!(size%kk)) { /* a candidate */
124             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
125             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
126             if (fact > best_fact) {
127               best_fact = fact; jj = kk;
128             }
129           }
130         }
131         if (jj != -1) rfactor = jj;
132         else rfactor = 1; /* a prime */
133         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
134         else expand_factor = rfactor;
135       }
136       new_size = size/rfactor; /* make new size one that is factor */
137       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
138         *a_Amat_crs = Cmat;
139         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
140         PetscFunctionReturn(0);
141       }
142     }
143 #if defined PETSC_GAMG_USE_LOG
144     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
145 #endif
146     /* make 'is_eq_newproc' */
147     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
148     if (pc_gamg->repart) {
149       /* Repartition Cmat_{k} and move colums of P^{k}_{k-1} and coordinates of primal part accordingly */
150       Mat      adj;
151       ierr = PetscInfo4(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
152       /* get 'adj' */
153       if (cr_bs == 1) {
154         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
155       } else {
156         /* make a scalar matrix to partition (no Stokes here) */
157         Mat               tMat;
158         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
159         const PetscScalar *vals;
160         const PetscInt    *idx;
161         PetscInt          *d_nnz, *o_nnz, M, N;
162         static PetscInt   llev = 0; /* ugly but just used for debugging */
163         MatType           mtype;
164 
165         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
166         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
167         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
168         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
169           ierr      = MatGetRow(Cmat,Ii,&ncols,0,0);CHKERRQ(ierr);
170           d_nnz[jj] = ncols/cr_bs;
171           o_nnz[jj] = ncols/cr_bs;
172           ierr      = MatRestoreRow(Cmat,Ii,&ncols,0,0);CHKERRQ(ierr);
173           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
174           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
175         }
176 
177         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
178         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
179         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
180         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
181         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
182         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
183         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
184 
185         for (ii = Istart_crs; ii < Iend_crs; ii++) {
186           PetscInt dest_row = ii/cr_bs;
187           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
188           for (jj = 0; jj < ncols; jj++) {
189             PetscInt    dest_col = idx[jj]/cr_bs;
190             PetscScalar v        = 1.0;
191             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
192           }
193           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
194         }
195         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197 
198         if (llev++ == -1) {
199           PetscViewer viewer; char fname[32];
200           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
201           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
202           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
203           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
204         }
205         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
206         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
207       } /* create 'adj' */
208 
209       { /* partition: get newproc_idx */
210         char            prefix[256];
211         const char      *pcpre;
212         const PetscInt  *is_idx;
213         MatPartitioning mpart;
214         IS              proc_is;
215 
216         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
217         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
218         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
219         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
220         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
221         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
222         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
223         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
224         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
225 
226         /* collect IS info */
227         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
228         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
229         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
230           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
231             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
232           }
233         }
234         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
235         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
236       }
237       ierr = MatDestroy(&adj);CHKERRQ(ierr);
238 
239       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
240       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
241     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
242       PetscInt targetPE;
243       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
244       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
245       targetPE = (rank/rfactor)*expand_factor;
246       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
247     } /* end simple 'is_eq_newproc' */
248 
249     /*
250       Create an index set from the is_eq_newproc index set to indicate the mapping TO
251     */
252     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
253     is_eq_num_prim = is_eq_num;
254     /*
255       Determine how many equations/vertices are assigned to each processor
256     */
257     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
258     ncrs_eq_new = counts[rank];
259     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
260     ncrs_new = ncrs_eq_new/cr_bs;
261 
262     ierr = PetscFree(counts);CHKERRQ(ierr);
263     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
264     {
265       Vec            src_crd, dest_crd;
266       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
267       VecScatter     vecscat;
268       PetscScalar    *array;
269       IS isscat;
270       /* move data (for primal equations only) */
271       /* Create a vector to contain the newly ordered element information */
272       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
273       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
274       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
275       /*
276 	There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
277 	a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
278       */
279       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
280       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
281       for (ii=0,jj=0; ii<ncrs; ii++) {
282 	PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
283 	for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
284       }
285       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
286       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
287       ierr = PetscFree(tidx);CHKERRQ(ierr);
288       /*
289 	Create a vector to contain the original vertex information for each element
290       */
291       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
292       for (jj=0; jj<ndata_cols; jj++) {
293 	const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
294 	for (ii=0; ii<ncrs; ii++) {
295 	  for (kk=0; kk<ndata_rows; kk++) {
296 	    PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
297 	    PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
298 	    ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
299 	  }
300 	}
301       }
302       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
303       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
304       /*
305 	Scatter the element vertex information (still in the original vertex ordering)
306 	to the correct processor
307       */
308       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
309       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
310       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
311       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
312       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
313       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
314       /*
315 	Put the element vertex data into a new allocation of the gdata->ele
316       */
317       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
318       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
319 
320       pc_gamg->data_sz = node_data_sz*ncrs_new;
321       strideNew        = ncrs_new*ndata_rows;
322 
323       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
324       for (jj=0; jj<ndata_cols; jj++) {
325 	for (ii=0; ii<ncrs_new; ii++) {
326 	  for (kk=0; kk<ndata_rows; kk++) {
327 	    PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
328 	    pc_gamg->data[ix] = PetscRealPart(array[jx]);
329 	  }
330 	}
331       }
332       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
333       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
334     }
335     /* move A and P (columns) with new layout */
336 #if defined PETSC_GAMG_USE_LOG
337     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
338 #endif
339     /*
340       Invert for MatCreateSubMatrix
341     */
342     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
343     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
344     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
345     if (is_eq_num != is_eq_num_prim) {
346       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
347     }
348     if (Pcolumnperm) {
349       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
350       *Pcolumnperm = new_eq_indices;
351     }
352     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
353 #if defined PETSC_GAMG_USE_LOG
354     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
355     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
356 #endif
357     /* 'a_Amat_crs' output */
358     {
359       Mat mat;
360       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
361       *a_Amat_crs = mat;
362     }
363     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
364 
365 #if defined PETSC_GAMG_USE_LOG
366     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
367 #endif
368     /* prolongator */
369     {
370       IS       findices;
371       PetscInt Istart,Iend;
372       Mat      Pnew;
373 
374       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
375 #if defined PETSC_GAMG_USE_LOG
376       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
377 #endif
378       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
379       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
380       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
381       ierr = ISDestroy(&findices);CHKERRQ(ierr);
382 
383 #if defined PETSC_GAMG_USE_LOG
384       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
385 #endif
386       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
387 
388       /* output - repartitioned */
389       *a_P_inout = Pnew;
390     }
391     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
392 
393     *a_nactive_proc = new_size; /* output */
394 
395     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
396     if (pc_gamg->cpu_pin_coarse_grids) {
397 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
398       static PetscInt llev = 2;
399       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
400 #endif
401       ierr = MatPinToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
402       ierr = MatPinToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
403       if (1) { /* lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
404         Mat         A = *a_Amat_crs, P = *a_P_inout;
405         PetscMPIInt size;
406         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
407         if (size > 1) {
408           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
409           ierr = VecPinToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
410           ierr = VecPinToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
411         }
412       }
413     }
414 #if defined PETSC_GAMG_USE_LOG
415     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
416 #endif
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 /* -------------------------------------------------------------------------- */
422 /*
423    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
424                     by setting data structures and options.
425 
426    Input Parameter:
427 .  pc - the preconditioner context
428 
429 */
430 PetscErrorCode PCSetUp_GAMG(PC pc)
431 {
432   PetscErrorCode ierr;
433   PC_MG          *mg      = (PC_MG*)pc->data;
434   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
435   Mat            Pmat     = pc->pmat;
436   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_GAMG_MAXLEVELS];
437   MPI_Comm       comm;
438   PetscMPIInt    rank,size,nactivepe;
439   Mat            Aarr[PETSC_GAMG_MAXLEVELS],Parr[PETSC_GAMG_MAXLEVELS];
440   IS             *ASMLocalIDsArr[PETSC_GAMG_MAXLEVELS];
441   PetscLogDouble nnz0=0.,nnztot=0.;
442   MatInfo        info;
443   PetscBool      is_last = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
447   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
448   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
449 
450   if (pc_gamg->setup_count++ > 0) {
451     if ((PetscBool)(!pc_gamg->reuse_prol)) {
452       /* reset everything */
453       ierr = PCReset_MG(pc);CHKERRQ(ierr);
454       pc->setupcalled = 0;
455     } else {
456       PC_MG_Levels **mglevels = mg->levels;
457       /* just do Galerkin grids */
458       Mat          B,dA,dB;
459 
460       if (!pc->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PCSetUp() has not been called yet");
461       if (pc_gamg->Nlevels > 1) {
462         /* currently only handle case where mat and pmat are the same on coarser levels */
463         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
464         /* (re)set to get dirty flag */
465         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
466 
467         for (level=pc_gamg->Nlevels-2; level>=0; level--) {
468           /* 2nd solve, matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
469           if (pc_gamg->setup_count==2 /* && pc_gamg->repart||reduction */) {
470             ierr = PetscInfo2(pc,"new RAP after first solve level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
471             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_INITIAL_MATRIX,2.0,&B);CHKERRQ(ierr);
472             ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr);
473             mglevels[level]->A = B;
474           } else {
475             ierr = PetscInfo2(pc,"RAP after first solve reusing matrix level %D, %D setup\n",level,pc_gamg->setup_count);CHKERRQ(ierr);
476             ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
477             ierr = MatPtAP(dB,mglevels[level+1]->interpolate,MAT_REUSE_MATRIX,1.0,&B);CHKERRQ(ierr);
478           }
479           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
480           dB   = B;
481         }
482       }
483 
484       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
485       PetscFunctionReturn(0);
486     }
487   }
488 
489   if (!pc_gamg->data) {
490     if (pc_gamg->orig_data) {
491       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
492       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
493 
494       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
495       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
496       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
497 
498       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
499       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
500     } else {
501       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
502       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
503     }
504   }
505 
506   /* cache original data for reuse */
507   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
508     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
509     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
510     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
511     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
512   }
513 
514   /* get basic dims */
515   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
516   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
517 
518   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
519   nnz0   = info.nz_used;
520   nnztot = info.nz_used;
521   ierr = PetscInfo6(pc,"level %d) N=%D, n data rows=%d, n data cols=%d, nnz/row (ave)=%d, np=%d\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
522 
523   /* Get A_i and R_i */
524   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
525     pc_gamg->current_level = level;
526     level1 = level + 1;
527 #if defined PETSC_GAMG_USE_LOG
528     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
529 #if (defined GAMG_STAGES)
530     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
531 #endif
532 #endif
533     { /* construct prolongator */
534       Mat              Gmat;
535       PetscCoarsenData *agg_lists;
536       Mat              Prol11;
537 
538       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
539       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
540       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
541 
542       /* could have failed to create new level */
543       if (Prol11) {
544         /* get new block size of coarse matrices */
545         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
546 
547         if (pc_gamg->ops->optprolongator) {
548           /* smooth */
549           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
550         }
551 
552         if (pc_gamg->use_aggs_in_asm) {
553           PetscInt bs;
554           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
555           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
556         }
557 
558         Parr[level1] = Prol11;
559       } else Parr[level1] = NULL; /* failed to coarsen */
560 
561       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
562       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
563     } /* construct prolongator scope */
564 #if defined PETSC_GAMG_USE_LOG
565     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
566 #endif
567     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
568     if (!Parr[level1]) { /* failed to coarsen */
569       ierr =  PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
570 #if defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES
571       ierr = PetscLogStagePop();CHKERRQ(ierr);
572 #endif
573       break;
574     }
575 #if defined PETSC_GAMG_USE_LOG
576     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
577 #endif
578     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
579     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ????????");
580     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
581     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
582     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
583 
584 #if defined PETSC_GAMG_USE_LOG
585     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
586 #endif
587     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
588     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
589     nnztot += info.nz_used;
590     ierr = PetscInfo5(pc,"%d) N=%D, n data cols=%d, nnz/row (ave)=%d, %d active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
591 
592 #if (defined PETSC_GAMG_USE_LOG && defined GAMG_STAGES)
593     ierr = PetscLogStagePop();CHKERRQ(ierr);
594 #endif
595     /* stop if one node or one proc -- could pull back for singular problems */
596     if ( (pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2) ) {
597       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
598       level++;
599       break;
600     }
601   } /* levels */
602   ierr                  = PetscFree(pc_gamg->data);CHKERRQ(ierr);
603 
604   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
605   pc_gamg->Nlevels = level + 1;
606   fine_level       = level;
607   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
608 
609   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
610     /* set default smoothers & set operators */
611     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
612       KSP smoother;
613       PC  subpc;
614 
615       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
616       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
617 
618       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
619       /* set ops */
620       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
621       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
622 
623       /* set defaults */
624       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
625 
626       /* set blocks for ASM smoother that uses the 'aggregates' */
627       if (pc_gamg->use_aggs_in_asm) {
628         PetscInt sz;
629         IS       *iss;
630 
631         sz   = nASMBlocksArr[level];
632         iss  = ASMLocalIDsArr[level];
633         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
634         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
635         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
636         if (!sz) {
637           IS       is;
638           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
639           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
640           ierr = ISDestroy(&is);CHKERRQ(ierr);
641         } else {
642           PetscInt kk;
643           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
644           for (kk=0; kk<sz; kk++) {
645             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
646           }
647           ierr = PetscFree(iss);CHKERRQ(ierr);
648         }
649         ASMLocalIDsArr[level] = NULL;
650         nASMBlocksArr[level]  = 0;
651       } else {
652         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
653       }
654     }
655     {
656       /* coarse grid */
657       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
658       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
659       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
660       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
661       if (!pc_gamg->use_parallel_coarse_grid_solver) {
662         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
663         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
664         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
665         ierr = PCSetUp(subpc);CHKERRQ(ierr);
666         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
667         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
668         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
669         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
670         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
671         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
672         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
673         /* This flag gets reset by PCBJacobiGetSubKSP(), but our BJacobi really does the same algorithm everywhere (and in
674          * fact, all but one process will have zero dofs), so we reset the flag to avoid having PCView_BJacobi attempt to
675          * view every subdomain as though they were different. */
676         ((PC_BJacobi*)subpc->data)->same_local_solves = PETSC_TRUE;
677       }
678     }
679 
680     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
681     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
682     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
683     ierr = PetscOptionsEnd();CHKERRQ(ierr);
684     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
685 
686     /* clean up */
687     for (level=1; level<pc_gamg->Nlevels; level++) {
688       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
689       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
690     }
691     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
692   } else {
693     KSP smoother;
694     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
695     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
696     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
697     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
698     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 /* ------------------------------------------------------------------------- */
704 /*
705  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
706    that was created with PCCreate_GAMG().
707 
708    Input Parameter:
709 .  pc - the preconditioner context
710 
711    Application Interface Routine: PCDestroy()
712 */
713 PetscErrorCode PCDestroy_GAMG(PC pc)
714 {
715   PetscErrorCode ierr;
716   PC_MG          *mg     = (PC_MG*)pc->data;
717   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
718 
719   PetscFunctionBegin;
720   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
721   if (pc_gamg->ops->destroy) {
722     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
723   }
724   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
725   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
726   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
727   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 /*@
732    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
733 
734    Logically Collective on PC
735 
736    Input Parameters:
737 +  pc - the preconditioner context
738 -  n - the number of equations
739 
740 
741    Options Database Key:
742 .  -pc_gamg_process_eq_limit <limit>
743 
744    Notes:
745     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
746           that has degrees of freedom
747 
748    Level: intermediate
749 
750 .seealso: PCGAMGSetCoarseEqLim()
751 @*/
752 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
753 {
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
758   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
763 {
764   PC_MG   *mg      = (PC_MG*)pc->data;
765   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
766 
767   PetscFunctionBegin;
768   if (n>0) pc_gamg->min_eq_proc = n;
769   PetscFunctionReturn(0);
770 }
771 
772 /*@
773    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
774 
775  Collective on PC
776 
777    Input Parameters:
778 +  pc - the preconditioner context
779 -  n - maximum number of equations to aim for
780 
781    Options Database Key:
782 .  -pc_gamg_coarse_eq_limit <limit>
783 
784    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
785      has less than 1000 unknowns.
786 
787    Level: intermediate
788 
789 .seealso: PCGAMGSetProcEqLim()
790 @*/
791 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
792 {
793   PetscErrorCode ierr;
794 
795   PetscFunctionBegin;
796   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
797   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
798   PetscFunctionReturn(0);
799 }
800 
801 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
802 {
803   PC_MG   *mg      = (PC_MG*)pc->data;
804   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
805 
806   PetscFunctionBegin;
807   if (n>0) pc_gamg->coarse_eq_limit = n;
808   PetscFunctionReturn(0);
809 }
810 
811 /*@
812    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
813 
814    Collective on PC
815 
816    Input Parameters:
817 +  pc - the preconditioner context
818 -  n - PETSC_TRUE or PETSC_FALSE
819 
820    Options Database Key:
821 .  -pc_gamg_repartition <true,false>
822 
823    Notes:
824     this will generally improve the loading balancing of the work on each level
825 
826    Level: intermediate
827 
828 .seealso: ()
829 @*/
830 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
831 {
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
836   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
837   PetscFunctionReturn(0);
838 }
839 
840 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
841 {
842   PC_MG   *mg      = (PC_MG*)pc->data;
843   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
844 
845   PetscFunctionBegin;
846   pc_gamg->repart = n;
847   PetscFunctionReturn(0);
848 }
849 
850 /*@
851    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
852 
853    Collective on PC
854 
855    Input Parameters:
856 +  pc - the preconditioner context
857 -  n - PETSC_TRUE or PETSC_FALSE
858 
859    Options Database Key:
860 .  -pc_gamg_reuse_interpolation <true,false>
861 
862    Level: intermediate
863 
864    Notes:
865     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
866           rebuilding the preconditioner quicker.
867 
868 .seealso: ()
869 @*/
870 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
871 {
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
876   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
881 {
882   PC_MG   *mg      = (PC_MG*)pc->data;
883   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
884 
885   PetscFunctionBegin;
886   pc_gamg->reuse_prol = n;
887   PetscFunctionReturn(0);
888 }
889 
890 /*@
891    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
892 
893    Collective on PC
894 
895    Input Parameters:
896 +  pc - the preconditioner context
897 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
898 
899    Options Database Key:
900 .  -pc_gamg_asm_use_agg
901 
902    Level: intermediate
903 
904 .seealso: ()
905 @*/
906 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
907 {
908   PetscErrorCode ierr;
909 
910   PetscFunctionBegin;
911   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
912   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
913   PetscFunctionReturn(0);
914 }
915 
916 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
917 {
918   PC_MG   *mg      = (PC_MG*)pc->data;
919   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
920 
921   PetscFunctionBegin;
922   pc_gamg->use_aggs_in_asm = flg;
923   PetscFunctionReturn(0);
924 }
925 
926 /*@
927    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
928 
929    Collective on PC
930 
931    Input Parameters:
932 +  pc - the preconditioner context
933 -  flg - PETSC_TRUE to not force coarse grid onto one processor
934 
935    Options Database Key:
936 .  -pc_gamg_use_parallel_coarse_grid_solver
937 
938    Level: intermediate
939 
940 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
941 @*/
942 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
943 {
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
948   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
953 {
954   PC_MG   *mg      = (PC_MG*)pc->data;
955   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
956 
957   PetscFunctionBegin;
958   pc_gamg->use_parallel_coarse_grid_solver = flg;
959   PetscFunctionReturn(0);
960 }
961 
962 /*@
963    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
964 
965    Collective on PC
966 
967    Input Parameters:
968 +  pc - the preconditioner context
969 -  flg - PETSC_TRUE to pin coarse grids to CPU
970 
971    Options Database Key:
972 .  -pc_gamg_cpu_pin_coarse_grids
973 
974    Level: intermediate
975 
976 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
977 @*/
978 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
979 {
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
984   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
989 {
990   PC_MG   *mg      = (PC_MG*)pc->data;
991   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
992 
993   PetscFunctionBegin;
994   pc_gamg->cpu_pin_coarse_grids = flg;
995   PetscFunctionReturn(0);
996 }
997 
998 /*@
999    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1000 
1001    Collective on PC
1002 
1003    Input Parameters:
1004 +  pc - the preconditioner context
1005 -  flg - Layout type
1006 
1007    Options Database Key:
1008 .  -pc_gamg_coarse_grid_layout_type
1009 
1010    Level: intermediate
1011 
1012 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1013 @*/
1014 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1015 {
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1020   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1025 {
1026   PC_MG   *mg      = (PC_MG*)pc->data;
1027   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1028 
1029   PetscFunctionBegin;
1030   pc_gamg->layout_type = flg;
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 /*@
1035    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1036 
1037    Not collective on PC
1038 
1039    Input Parameters:
1040 +  pc - the preconditioner
1041 -  n - the maximum number of levels to use
1042 
1043    Options Database Key:
1044 .  -pc_mg_levels
1045 
1046    Level: intermediate
1047 
1048 .seealso: ()
1049 @*/
1050 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1051 {
1052   PetscErrorCode ierr;
1053 
1054   PetscFunctionBegin;
1055   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1056   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1061 {
1062   PC_MG   *mg      = (PC_MG*)pc->data;
1063   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1064 
1065   PetscFunctionBegin;
1066   pc_gamg->Nlevels = n;
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 /*@
1071    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1072 
1073    Not collective on PC
1074 
1075    Input Parameters:
1076 +  pc - the preconditioner context
1077 -  threshold - the threshold value, 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1078 
1079    Options Database Key:
1080 .  -pc_gamg_threshold <threshold>
1081 
1082    Notes:
1083     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1084     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1085 
1086    Level: intermediate
1087 
1088 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1089 @*/
1090 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1091 {
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1096   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1101 {
1102   PC_MG   *mg      = (PC_MG*)pc->data;
1103   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1104   PetscInt i;
1105   PetscFunctionBegin;
1106   for (i=0;i<n;i++) pc_gamg->threshold[i] = v[i];
1107   do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_GAMG_MAXLEVELS);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 /*@
1112    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1113 
1114    Not collective on PC
1115 
1116    Input Parameters:
1117 +  pc - the preconditioner context
1118 -  scale - the threshold value reduction, ussually < 1.0
1119 
1120    Options Database Key:
1121 .  -pc_gamg_threshold_scale <v>
1122 
1123    Level: advanced
1124 
1125 .seealso: ()
1126 @*/
1127 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1128 {
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1133   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1138 {
1139   PC_MG   *mg      = (PC_MG*)pc->data;
1140   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1141   PetscFunctionBegin;
1142   pc_gamg->threshold_scale = v;
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 /*@C
1147    PCGAMGSetType - Set solution method
1148 
1149    Collective on PC
1150 
1151    Input Parameters:
1152 +  pc - the preconditioner context
1153 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1154 
1155    Options Database Key:
1156 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1157 
1158    Level: intermediate
1159 
1160 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1161 @*/
1162 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1163 {
1164   PetscErrorCode ierr;
1165 
1166   PetscFunctionBegin;
1167   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1168   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 /*@C
1173    PCGAMGGetType - Get solution method
1174 
1175    Collective on PC
1176 
1177    Input Parameter:
1178 .  pc - the preconditioner context
1179 
1180    Output Parameter:
1181 .  type - the type of algorithm used
1182 
1183    Level: intermediate
1184 
1185 .seealso: PCGAMGSetType(), PCGAMGType
1186 @*/
1187 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1188 {
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1193   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1198 {
1199   PC_MG          *mg      = (PC_MG*)pc->data;
1200   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1201 
1202   PetscFunctionBegin;
1203   *type = pc_gamg->type;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1208 {
1209   PetscErrorCode ierr,(*r)(PC);
1210   PC_MG          *mg      = (PC_MG*)pc->data;
1211   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1212 
1213   PetscFunctionBegin;
1214   pc_gamg->type = type;
1215   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1216   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1217   if (pc_gamg->ops->destroy) {
1218     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1219     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1220     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1221     /* cleaning up common data in pc_gamg - this should disapear someday */
1222     pc_gamg->data_cell_cols = 0;
1223     pc_gamg->data_cell_rows = 0;
1224     pc_gamg->orig_data_cell_cols = 0;
1225     pc_gamg->orig_data_cell_rows = 0;
1226     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1227     pc_gamg->data_sz = 0;
1228   }
1229   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1230   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1231   ierr = (*r)(pc);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 /* -------------------------------------------------------------------------- */
1236 /*
1237    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1238 
1239    Input Parameter:
1240 .  pc - the preconditioner context
1241 
1242    Output Parameter:
1243 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1244 
1245    Level: advanced
1246 */
1247 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1248 {
1249   PetscErrorCode ierr;
1250   PC_MG          *mg      = (PC_MG*)pc->data;
1251   PC_MG_Levels   **mglevels = mg->levels;
1252   PetscInt       lev;
1253   PetscLogDouble nnz0 = 0, sgc = 0;
1254   MatInfo        info;
1255 
1256   PetscFunctionBegin;
1257   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1258   for (lev=0; lev<mg->nlevels; lev++) {
1259     Mat dB;
1260     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1261     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1262     sgc += info.nz_used;
1263     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1264   }
1265   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1266   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1271 {
1272   PetscErrorCode ierr,i;
1273   PC_MG          *mg      = (PC_MG*)pc->data;
1274   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1275   PetscReal       gc=0;
1276   PetscFunctionBegin;
1277   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1278   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1279   for (i=0;i<pc_gamg->current_level;i++) {
1280     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1281   }
1282   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1283   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1284   if (pc_gamg->use_aggs_in_asm) {
1285     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1286   }
1287   if (pc_gamg->use_parallel_coarse_grid_solver) {
1288     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1289   }
1290 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1291   if (pc_gamg->cpu_pin_coarse_grids) {
1292     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1293   }
1294 #endif
1295   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1296   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1297   /* } else { */
1298   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1299   /* } */
1300   if (pc_gamg->ops->view) {
1301     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1302   }
1303   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1304   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1309 {
1310   PetscErrorCode ierr;
1311   PC_MG          *mg      = (PC_MG*)pc->data;
1312   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1313   PetscBool      flag;
1314   MPI_Comm       comm;
1315   char           prefix[256];
1316   PetscInt       i,n;
1317   const char     *pcpre;
1318   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",0};
1319   PetscFunctionBegin;
1320   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1321   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1322   {
1323     char tname[256];
1324     ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1325     if (flag) {
1326       ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1327     }
1328     ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1329     ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1330     ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1331     ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1332     ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1333     ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1334     ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1335     ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1336     ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1337     n = PETSC_GAMG_MAXLEVELS;
1338     ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1339     if (!flag || n < PETSC_GAMG_MAXLEVELS) {
1340       if (!flag) n = 1;
1341       i = n;
1342       do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_GAMG_MAXLEVELS);
1343     }
1344     ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1345 
1346     /* set options for subtype */
1347     if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1348   }
1349   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1350   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1351   ierr = PetscOptionsTail();CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 /* -------------------------------------------------------------------------- */
1356 /*MC
1357      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1358 
1359    Options Database Keys:
1360 +   -pc_gamg_type <type> - one of agg, geo, or classical
1361 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1362 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1363 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1364 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1365                                         equations on each process that has degrees of freedom
1366 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1367 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1368 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1369 
1370    Options Database Keys for default Aggregation:
1371 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1372 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1373 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1374 
1375    Multigrid options:
1376 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1377 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1378 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1379 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1380 
1381 
1382   Notes:
1383     In order to obtain good performance for PCGAMG for vector valued problems you must
1384        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1385        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1386        See the Users Manual Chapter 4 for more details
1387 
1388   Level: intermediate
1389 
1390 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1391            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation()
1392 M*/
1393 
1394 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1395 {
1396   PetscErrorCode ierr,i;
1397   PC_GAMG        *pc_gamg;
1398   PC_MG          *mg;
1399 
1400   PetscFunctionBegin;
1401    /* register AMG type */
1402   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1403 
1404   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1405   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1406   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1407 
1408   /* create a supporting struct and attach it to pc */
1409   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1410   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1411   mg           = (PC_MG*)pc->data;
1412   mg->innerctx = pc_gamg;
1413 
1414   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1415 
1416   pc_gamg->setup_count = 0;
1417   /* these should be in subctx but repartitioning needs simple arrays */
1418   pc_gamg->data_sz = 0;
1419   pc_gamg->data    = 0;
1420 
1421   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1422   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1423   pc->ops->setup          = PCSetUp_GAMG;
1424   pc->ops->reset          = PCReset_GAMG;
1425   pc->ops->destroy        = PCDestroy_GAMG;
1426   mg->view                = PCView_GAMG;
1427 
1428   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1430   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1431   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1432   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1443   pc_gamg->repart           = PETSC_FALSE;
1444   pc_gamg->reuse_prol       = PETSC_FALSE;
1445   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1446   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1447   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1448   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1449   pc_gamg->min_eq_proc      = 50;
1450   pc_gamg->coarse_eq_limit  = 50;
1451   for (i=0;i<PETSC_GAMG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1452   pc_gamg->threshold_scale = 1.;
1453   pc_gamg->Nlevels          = PETSC_GAMG_MAXLEVELS;
1454   pc_gamg->current_level    = 0; /* don't need to init really */
1455   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1456 
1457   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1458   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 /*@C
1463  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1464     from PCInitializePackage().
1465 
1466  Level: developer
1467 
1468  .seealso: PetscInitialize()
1469 @*/
1470 PetscErrorCode PCGAMGInitializePackage(void)
1471 {
1472   PetscErrorCode ierr;
1473 
1474   PetscFunctionBegin;
1475   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1476   PCGAMGPackageInitialized = PETSC_TRUE;
1477   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1478   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1479   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1480   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1481 
1482   /* general events */
1483   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1484   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1485   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1486   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1487   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1488   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1489   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1490 
1491 #if defined PETSC_GAMG_USE_LOG
1492   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1493   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1494   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1495   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1496   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1497   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1498   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1499   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1500   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1501   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1502   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1503   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1504   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1505   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1506   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1507   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1508   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1509 
1510   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1511   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1512   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1513   /* create timer stages */
1514 #if defined GAMG_STAGES
1515   {
1516     char     str[32];
1517     PetscInt lidx;
1518     sprintf(str,"MG Level %d (finest)",0);
1519     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1520     for (lidx=1; lidx<9; lidx++) {
1521       sprintf(str,"MG Level %d",lidx);
1522       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1523     }
1524   }
1525 #endif
1526 #endif
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 /*@C
1531  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1532     called from PetscFinalize() automatically.
1533 
1534  Level: developer
1535 
1536  .seealso: PetscFinalize()
1537 @*/
1538 PetscErrorCode PCGAMGFinalizePackage(void)
1539 {
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   PCGAMGPackageInitialized = PETSC_FALSE;
1544   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 /*@C
1549  PCGAMGRegister - Register a PCGAMG implementation.
1550 
1551  Input Parameters:
1552  + type - string that will be used as the name of the GAMG type.
1553  - create - function for creating the gamg context.
1554 
1555   Level: advanced
1556 
1557  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1558 @*/
1559 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1560 {
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1565   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569