xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision ff1a2e19e92ca3eb486258317b2f290407e2b2d1)
1 /*
2  GAMG geometric-algebric multiogrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <private/kspimpl.h>
7 
8 #include <assert.h>
9 #include <petscblaslapack.h>
10 
11 typedef struct {
12   PetscInt nsmooths;
13   Mat aux_mat;
14   PetscBool sym_graph;
15 }PC_GAMG_AGG;
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "PCGAMGSetNSmooths"
19 /*@
20    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
21 
22    Not Collective on PC
23 
24    Input Parameters:
25 .  pc - the preconditioner context
26 
27    Options Database Key:
28 .  -pc_gamg_agg_nsmooths
29 
30    Level: intermediate
31 
32    Concepts: Aggregation AMG preconditioner
33 
34 .seealso: ()
35 @*/
36 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
37 {
38   PetscErrorCode ierr;
39 
40   PetscFunctionBegin;
41   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
42   ierr = PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 
46 EXTERN_C_BEGIN
47 #undef __FUNCT__
48 #define __FUNCT__ "PCGAMGSetNSmooths_GAMG"
49 PetscErrorCode PCGAMGSetNSmooths_GAMG(PC pc, PetscInt n)
50 {
51   PC_MG           *mg = (PC_MG*)pc->data;
52   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
53   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
54 
55   PetscFunctionBegin;
56   pc_gamg_agg->nsmooths = n;
57   PetscFunctionReturn(0);
58 }
59 EXTERN_C_END
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCGAMGSetSymGraph"
63 /*@
64    PCGAMGSetSymGraph -
65 
66    Not Collective on PC
67 
68    Input Parameters:
69 .  pc - the preconditioner context
70 
71    Options Database Key:
72 .  -pc_gamg_sym_graph
73 
74    Level: intermediate
75 
76    Concepts: Aggregation AMG preconditioner
77 
78 .seealso: ()
79 @*/
80 PetscErrorCode PCGAMGSetSymGraph(PC pc, PetscBool n)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
86   ierr = PetscTryMethod(pc,"PCGAMGSetSymGraph_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 EXTERN_C_BEGIN
91 #undef __FUNCT__
92 #define __FUNCT__ "PCGAMGSetSymGraph_GAMG"
93 PetscErrorCode PCGAMGSetSymGraph_GAMG(PC pc, PetscBool n)
94 {
95   PC_MG           *mg = (PC_MG*)pc->data;
96   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
97   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
98 
99   PetscFunctionBegin;
100   pc_gamg_agg->sym_graph = n;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCSetFromOptions_GAMG_AGG
108 
109   Input Parameter:
110    . pc -
111 */
112 #undef __FUNCT__
113 #define __FUNCT__ "PCSetFromOptions_GAMG_AGG"
114 PetscErrorCode PCSetFromOptions_GAMG_AGG( PC pc )
115 {
116   PetscErrorCode  ierr;
117   PC_MG           *mg = (PC_MG*)pc->data;
118   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
119   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
120   PetscBool        flag;
121 
122   PetscFunctionBegin;
123   /* call base class */
124   ierr = PCSetFromOptions_GAMG( pc ); CHKERRQ(ierr);
125 
126   ierr = PetscOptionsHead("GAMG-AGG options"); CHKERRQ(ierr);
127   {
128     /* -pc_gamg_agg_nsmooths */
129     pc_gamg_agg->nsmooths = 0;
130     ierr = PetscOptionsInt("-pc_gamg_agg_nsmooths",
131                            "smoothing steps for smoothed aggregation, usually 1 (0)",
132                            "PCGAMGSetNSmooths",
133                            pc_gamg_agg->nsmooths,
134                            &pc_gamg_agg->nsmooths,
135                            &flag);
136     CHKERRQ(ierr);
137 
138     /* -pc_gamg_sym_graph */
139     pc_gamg_agg->sym_graph = PETSC_FALSE;
140     ierr = PetscOptionsBool("-pc_gamg_sym_graph",
141                             "Set for asymetric matrices",
142                             "PCGAMGSetSymGraph",
143                             pc_gamg_agg->sym_graph,
144                             &pc_gamg_agg->sym_graph,
145                             &flag);
146     CHKERRQ(ierr);
147   }
148   ierr = PetscOptionsTail();CHKERRQ(ierr);
149 
150   PetscFunctionReturn(0);
151 }
152 
153 /* -------------------------------------------------------------------------- */
154 /*
155    PCDestroy_AGG
156 
157   Input Parameter:
158    . pc -
159 */
160 #undef __FUNCT__
161 #define __FUNCT__ "PCDestroy_AGG"
162 PetscErrorCode PCDestroy_AGG( PC pc )
163 {
164   PetscErrorCode  ierr;
165   PC_MG           *mg = (PC_MG*)pc->data;
166   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
167   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
168 
169   PetscFunctionBegin;
170   if( pc_gamg_agg ) {
171     ierr = PetscFree(pc_gamg_agg);CHKERRQ(ierr);
172     pc_gamg_agg = 0;
173   }
174 
175   /* call base class */
176   ierr = PCDestroy_GAMG( pc );CHKERRQ(ierr);
177 
178   PetscFunctionReturn(0);
179 }
180 
181 /* -------------------------------------------------------------------------- */
182 /*
183    PCSetCoordinates_AGG
184 
185    Input Parameter:
186    .  pc - the preconditioner context
187 */
188 EXTERN_C_BEGIN
189 #undef __FUNCT__
190 #define __FUNCT__ "PCSetCoordinates_AGG"
191 PetscErrorCode PCSetCoordinates_AGG( PC pc, PetscInt ndm, PetscReal *coords )
192 {
193   PC_MG          *mg = (PC_MG*)pc->data;
194   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
195   PetscErrorCode ierr;
196   PetscInt       arrsz,bs,my0,kk,ii,jj,nloc,Iend;
197   Mat            Amat = pc->pmat;
198 
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific( Amat, MAT_CLASSID, 1 );
201   ierr  = MatGetBlockSize( Amat, &bs );               CHKERRQ( ierr );
202   ierr  = MatGetOwnershipRange( Amat, &my0, &Iend ); CHKERRQ(ierr);
203   nloc = (Iend-my0)/bs;
204   if((Iend-my0)%bs!=0) SETERRQ1(((PetscObject)Amat)->comm,PETSC_ERR_ARG_WRONG, "Bad local size %d.",nloc);
205 
206   /* SA: null space vectors */
207   if( coords && bs==1 ) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
208   else if( coords ) pc_gamg->data_cell_cols = (ndm==2 ? 3 : 6); /* elasticity */
209   else pc_gamg->data_cell_cols = bs; /* no data, force SA with constant null space vectors */
210   pc_gamg->data_cell_rows = bs;
211 
212   arrsz = nloc*pc_gamg->data_cell_rows*pc_gamg->data_cell_cols;
213 
214   /* create data - syntactic sugar that should be refactored at some point */
215   if (pc_gamg->data==0 || (pc_gamg->data_sz != arrsz)) {
216     ierr = PetscFree( pc_gamg->data );  CHKERRQ(ierr);
217     ierr = PetscMalloc(arrsz*sizeof(PetscReal), &pc_gamg->data ); CHKERRQ(ierr);
218   }
219   /* copy data in - column oriented */
220   for(kk=0;kk<nloc;kk++){
221     const PetscInt M = Iend - my0;
222     PetscReal *data = &pc_gamg->data[kk*bs];
223     if( pc_gamg->data_cell_cols==1 ) *data = 1.0;
224     else {
225       for(ii=0;ii<bs;ii++)
226         for(jj=0;jj<bs;jj++)
227           if(ii==jj)data[ii*M + jj] = 1.0; /* translational modes */
228           else data[ii*M + jj] = 0.0;
229       if( coords ) {
230         if( ndm == 2 ){ /* rotational modes */
231           data += 2*M;
232           data[0] = -coords[2*kk+1];
233           data[1] =  coords[2*kk];
234         }
235         else {
236           data += 3*M;
237           data[0] = 0.0;               data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
238           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;               data[2*M+1] =  coords[3*kk];
239           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
240         }
241       }
242     }
243   }
244 
245   pc_gamg->data_sz = arrsz;
246 
247   PetscFunctionReturn(0);
248 }
249 EXTERN_C_END
250 
251 typedef PetscInt NState;
252 static const NState NOT_DONE=-2;
253 static const NState DELETED=-1;
254 static const NState REMOVED=-3;
255 #define IS_SELECTED(s) (s!=DELETED && s!=NOT_DONE && s!=REMOVED)
256 
257 /* -------------------------------------------------------------------------- */
258 /*
259    smoothAggs - greedy grab of with G1 (unsquared graph) -- AIJ specific
260      - AGG-MG specific: clears singletons out of 'selected_2'
261 
262    Input Parameter:
263    . Gmat_2 - glabal matrix of graph (data not defined)
264    . Gmat_1 - base graph to grab with
265    . selected_2 -
266    Input/Output Parameter:
267    . llist_aggs_2 - linked list of aggs, ghost lids are based on Gmat_2 (squared graph)
268 */
269 #undef __FUNCT__
270 #define __FUNCT__ "smoothAggs"
271 PetscErrorCode smoothAggs( const Mat Gmat_2, /* base (squared) graph */
272                            const Mat Gmat_1, /* base graph, could be unsymmetic */
273                            const IS selected_2, /* [nselected total] selected vertices */
274                            IS llist_aggs_2 /* [nloc_nghost] global ID of aggregate */
275                            )
276 {
277   PetscErrorCode ierr;
278   PetscBool      isMPI;
279   Mat_SeqAIJ    *matA_1, *matB_1=0, *matA_2, *matB_2=0;
280   MPI_Comm       wcomm = ((PetscObject)Gmat_2)->comm;
281   PetscMPIInt    mype;
282   PetscInt       lid,*ii,*idx,ix,Iend,my0,nnodes_2,kk,n,j;
283   Mat_MPIAIJ    *mpimat_2 = 0, *mpimat_1=0;
284   const PetscInt nloc = Gmat_2->rmap->n;
285   PetscScalar   *cpcol_1_state,*cpcol_2_state,*deleted_parent_gid;
286   PetscInt      *lid_cprowID_1,*id_llist_2,*lid_cprowID_2;
287   NState        *lid_state;
288 
289   PetscFunctionBegin;
290   ierr = MPI_Comm_rank( wcomm, &mype );   CHKERRQ(ierr);
291   ierr = MatGetOwnershipRange(Gmat_1,&my0,&Iend);  CHKERRQ(ierr);
292 
293   if( !PETSC_TRUE ) {
294     PetscViewer viewer; char fname[32]; static int llev=0;
295     sprintf(fname,"Gmat2_%d.m",llev++);
296     PetscViewerASCIIOpen(wcomm,fname,&viewer);
297     ierr = PetscViewerSetFormat( viewer, PETSC_VIEWER_ASCII_MATLAB);  CHKERRQ(ierr);
298     ierr = MatView(Gmat_2, viewer ); CHKERRQ(ierr);
299     ierr = PetscViewerDestroy( &viewer );
300   }
301 
302   { /* copy linked list into temp buffer - should not work directly on pointer */
303     const PetscInt *llist_idx;
304     ierr = ISGetSize( llist_aggs_2, &nnodes_2 );        CHKERRQ(ierr);
305     ierr = PetscMalloc( nnodes_2*sizeof(PetscInt), &id_llist_2 ); CHKERRQ(ierr);
306     ierr = ISGetIndices( llist_aggs_2, &llist_idx );       CHKERRQ(ierr);
307     for(lid=0;lid<nnodes_2;lid++) id_llist_2[lid] = llist_idx[lid];
308     ierr = ISRestoreIndices( llist_aggs_2, &llist_idx );     CHKERRQ(ierr);
309   }
310 
311   /* get submatrices */
312   ierr = PetscTypeCompare( (PetscObject)Gmat_1, MATMPIAIJ, &isMPI ); CHKERRQ(ierr);
313   if(isMPI) {
314     /* grab matrix objects */
315     mpimat_2 = (Mat_MPIAIJ*)Gmat_2->data;
316     mpimat_1 = (Mat_MPIAIJ*)Gmat_1->data;
317     matA_1 = (Mat_SeqAIJ*)mpimat_1->A->data;
318     matB_1 = (Mat_SeqAIJ*)mpimat_1->B->data;
319     matA_2 = (Mat_SeqAIJ*)mpimat_2->A->data;
320     matB_2 = (Mat_SeqAIJ*)mpimat_2->B->data;
321 
322     /* force compressed row storage for B matrix in AuxMat */
323     matB_1->compressedrow.check = PETSC_TRUE;
324     ierr = MatCheckCompressedRow(mpimat_1->B,&matB_1->compressedrow,matB_1->i,Gmat_1->rmap->n,-1.0);
325     CHKERRQ(ierr);
326 
327     ierr = PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_2 ); CHKERRQ(ierr);
328     ierr = PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_1 ); CHKERRQ(ierr);
329     for(lid=0;lid<nloc;lid++) lid_cprowID_1[lid] = lid_cprowID_2[lid] = -1;
330     for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
331       PetscInt lid = matB_1->compressedrow.rindex[ix];
332       lid_cprowID_1[lid] = ix;
333     }
334     for (ix=0; ix<matB_2->compressedrow.nrows; ix++) {
335       PetscInt lid = matB_2->compressedrow.rindex[ix];
336       lid_cprowID_2[lid] = ix;
337     }
338   }
339   else {
340     matA_1 = (Mat_SeqAIJ*)Gmat_1->data;
341     matA_2 = (Mat_SeqAIJ*)Gmat_2->data;
342     lid_cprowID_2 = lid_cprowID_1 = 0;
343   }
344   assert( matA_1 && !matA_1->compressedrow.use );
345   assert( matB_1==0 || matB_1->compressedrow.use );
346   assert( matA_2 && !matA_2->compressedrow.use );
347   assert( matB_2==0 || matB_2->compressedrow.use );
348 
349   /* get state of locals and selected gid for deleted */
350   ierr = PetscMalloc( nloc*sizeof(NState), &lid_state ); CHKERRQ(ierr);
351   ierr = PetscMalloc( nloc*sizeof(PetscScalar), &deleted_parent_gid ); CHKERRQ(ierr);
352   for( lid = 0 ; lid < nloc ; lid++ ) {
353     deleted_parent_gid[lid] = -1.0;
354     lid_state[lid] = DELETED;
355   }
356   /* set index into compressed row 'lid_cprowID', not -1 means its a boundary node */
357   {
358     PetscInt nSelected;
359     const PetscInt *selected_idx;
360     /* set local selected */
361     ierr = ISGetSize( selected_2, &nSelected );        CHKERRQ(ierr);
362     ierr = ISGetIndices( selected_2, &selected_idx );       CHKERRQ(ierr);
363     for(kk=0;kk<nSelected;kk++){
364       PetscInt lid = selected_idx[kk];
365       if(lid<nloc) lid_state[lid] = (NState)(lid+my0); /* selected flag */
366       else break;
367       /* remove singletons */
368       ii = matA_2->i; n = ii[lid+1] - ii[lid];
369       if( n < 2 ) {
370         if(!lid_cprowID_2 || (ix=lid_cprowID_2[lid])==-1 || (matB_2->compressedrow.i[ix+1]-matB_2->compressedrow.i[ix])==0){
371           lid_state[lid] = REMOVED;
372         }
373       }
374     }
375     ierr = ISRestoreIndices( selected_2, &selected_idx );     CHKERRQ(ierr);
376   }
377   /* map local to selected local, -1 means a ghost owns it */
378   for(lid=kk=0;lid<nloc;lid++){
379     NState state = lid_state[lid];
380     if( IS_SELECTED(state) ){
381       PetscInt flid = lid;
382       do{
383         if(flid<nloc){
384           deleted_parent_gid[flid] = (PetscScalar)(lid + my0);
385         }
386         kk++;
387       } while( (flid=id_llist_2[flid]) != -1 );
388     }
389   }
390   /* get 'cpcol_1_state', 'cpcol_2_state' - uses mpimat_1->lvec & mpimat_2->lvec for temp space */
391   if (isMPI) {
392     Vec          tempVec;
393 
394     /* get 'cpcol_1_state' */
395     ierr = MatGetVecs( Gmat_1, &tempVec, 0 );         CHKERRQ(ierr);
396     for(kk=0,j=my0;kk<nloc;kk++,j++){
397       PetscScalar v = (PetscScalar)lid_state[kk];
398       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
399     }
400     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
401     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
402     ierr = VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
403     CHKERRQ(ierr);
404     ierr =   VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
405     CHKERRQ(ierr);
406     ierr = VecGetArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
407     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
408 
409     /* get 'cpcol_2_state' */
410     ierr = MatGetVecs( Gmat_2, &tempVec, 0 );         CHKERRQ(ierr);
411     for(kk=0,j=my0;kk<nloc;kk++,j++){
412       PetscScalar v = (PetscScalar)lid_state[kk];
413       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
414     }
415     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
416     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
417     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
418     CHKERRQ(ierr);
419     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
420     CHKERRQ(ierr);
421     ierr = VecGetArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
422     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
423   } /* ismpi */
424 
425   /* doit */
426   for(lid=0;lid<nloc;lid++){
427     NState state = lid_state[lid];
428     if( IS_SELECTED(state) ) {      /* steal locals */
429       ii = matA_1->i; n = ii[lid+1] - ii[lid];
430       idx = matA_1->j + ii[lid];
431       for (j=0; j<n; j++) {
432         PetscInt flid, lidj = idx[j], sgid;
433         NState statej = lid_state[lidj];
434         if (statej==DELETED && (sgid=(PetscInt)PetscRealPart(deleted_parent_gid[lidj])) != lid+my0) { /* steal local */
435           deleted_parent_gid[lidj] = (PetscScalar)(lid+my0); /* send this with _2 */
436           if( sgid >= my0 && sgid < my0+nloc ){       /* I'm stealing this local from a local */
437             PetscInt hav=0, flid2=sgid-my0, lastid;
438             /* looking for local from local so id_llist_2 works */
439             for( lastid=flid2, flid=id_llist_2[flid2] ; flid!=-1 ; flid=id_llist_2[flid] ) {
440               if( flid == lidj ) {
441                 id_llist_2[lastid] = id_llist_2[flid];                    /* remove lidj from list */
442                 id_llist_2[flid] = id_llist_2[lid]; id_llist_2[lid] = flid; /* insert 'lidj' into head of llist */
443                 hav++;
444                 break;
445               }
446               lastid = flid;
447             }
448             if(hav!=1){
449               if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
450               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
451             }
452           }
453           else{            /* I'm stealing this local, owned by a ghost - ok to use _2, local */
454             assert(sgid==-1);
455             id_llist_2[lidj] = id_llist_2[lid]; id_llist_2[lid] = lidj; /* insert 'lidj' into head of llist */
456             /* local remove at end, off add/rm at end */
457           }
458         }
459       }
460     }
461     else if( state == DELETED && lid_cprowID_1 ) {
462       PetscInt sgidold = (PetscInt)PetscRealPart(deleted_parent_gid[lid]);
463       /* see if I have a selected ghost neighbor that will steal me */
464       if( (ix=lid_cprowID_1[lid]) != -1 ){
465         ii = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
466         idx = matB_1->j + ii[ix];
467         for( j=0 ; j<n ; j++ ) {
468           PetscInt cpid = idx[j];
469           NState statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
470           if( IS_SELECTED(statej) && sgidold != (PetscInt)statej ) { /* ghost will steal this, remove from my list */
471             deleted_parent_gid[lid] = (PetscScalar)statej; /* send who selected with _2 */
472             if( sgidold>=my0 && sgidold<(my0+nloc) ) { /* this was mine */
473               PetscInt lastid,hav=0,flid,oldslidj=sgidold-my0;
474               /* remove from 'oldslidj' list, local so _2 is OK */
475               for( lastid=oldslidj, flid=id_llist_2[oldslidj] ; flid != -1 ; flid=id_llist_2[flid] ) {
476                 if( flid == lid ) {
477                   id_llist_2[lastid] = id_llist_2[flid];   /* remove lid from oldslidj list */
478                   hav++;
479                   break;
480                 }
481                 lastid = flid;
482               }
483               if(hav!=1){
484                 if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
485                 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
486               }
487               id_llist_2[lid] = -1; /* terminate linked list - needed? */
488             }
489             else assert(id_llist_2[lid] == -1);
490           }
491         }
492       }
493     } /* selected/deleted */
494     else assert(state == REMOVED || !lid_cprowID_1);
495   } /* node loop */
496 
497   if( isMPI ) {
498     PetscScalar *cpcol_2_sel_gid;
499     Vec          tempVec;
500     PetscInt     cpid;
501 
502     ierr = VecRestoreArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
503     ierr = VecRestoreArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
504 
505     /* get 'cpcol_2_sel_gid' */
506     ierr = MatGetVecs( Gmat_2, &tempVec, 0 );         CHKERRQ(ierr);
507     for(kk=0,j=my0;kk<nloc;kk++,j++){
508       ierr = VecSetValues( tempVec, 1, &j, &deleted_parent_gid[kk], INSERT_VALUES );  CHKERRQ(ierr);
509     }
510     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
511     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
512     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
513     CHKERRQ(ierr);
514     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
515     CHKERRQ(ierr);
516     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
517 
518     ierr = VecGetArray( mpimat_2->lvec, &cpcol_2_sel_gid ); CHKERRQ(ierr);
519 
520     /* look for deleted ghosts and see if they moved */
521     for(lid=0;lid<nloc;lid++){
522       NState state = lid_state[lid];
523       if( IS_SELECTED(state) ){
524         PetscInt flid,lastid,old_sgid=lid+my0;
525         /* look for deleted ghosts and see if they moved */
526         for( lastid=lid, flid=id_llist_2[lid] ; flid!=-1 ; flid=id_llist_2[flid] ) {
527           if( flid>=nloc ) {
528             PetscInt cpid = flid-nloc, sgid_new = (PetscInt)PetscRealPart(cpcol_2_sel_gid[cpid]);
529             if( sgid_new != old_sgid && sgid_new != -1 ) {
530               id_llist_2[lastid] = id_llist_2[flid];                    /* remove 'flid' from list */
531               id_llist_2[flid] = -1;
532               flid = lastid;
533             } /* if it changed parents */
534             else lastid = flid;
535           } /* for ghost nodes */
536           else lastid = flid;
537         } /* loop over list of deleted */
538       } /* selected */
539     }
540 
541     /* look at ghosts, see if they changed, and moved here */
542     for(cpid=0;cpid<nnodes_2-nloc;cpid++){
543       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_sel_gid[cpid]);
544       if( sgid_new>=my0 && sgid_new<(my0+nloc) ) { /* this is mine */
545         PetscInt lastid,flid,slid_new=sgid_new-my0,flidj=nloc+cpid,hav=0;
546         for( lastid=slid_new, flid=id_llist_2[slid_new] ; flid != -1 ; flid=id_llist_2[flid] ) {
547           if( flid == flidj ) {
548             hav++;
549             break;
550           }
551           lastid = flid;
552         }
553         if( hav != 1 ){
554           assert(id_llist_2[flidj] == -1);
555           id_llist_2[flidj] = id_llist_2[slid_new]; id_llist_2[slid_new] = flidj; /* insert 'flidj' into head of llist */
556         }
557       }
558     }
559 
560     ierr = VecRestoreArray( mpimat_2->lvec, &cpcol_2_sel_gid ); CHKERRQ(ierr);
561     ierr = PetscFree( lid_cprowID_1 );  CHKERRQ(ierr);
562     ierr = PetscFree( lid_cprowID_2 );  CHKERRQ(ierr);
563   }
564 
565   /* copy out new aggs */
566   ierr = ISGeneralSetIndices(llist_aggs_2, nnodes_2, id_llist_2, PETSC_COPY_VALUES ); CHKERRQ(ierr);
567 
568   ierr = PetscFree( id_llist_2 );  CHKERRQ(ierr);
569   ierr = PetscFree( deleted_parent_gid );  CHKERRQ(ierr);
570   ierr = PetscFree( lid_state );  CHKERRQ(ierr);
571 
572   PetscFunctionReturn(0);
573 }
574 
575 /* -------------------------------------------------------------------------- */
576 /*
577    PCSetData_AGG
578 
579   Input Parameter:
580    . pc -
581 */
582 #undef __FUNCT__
583 #define __FUNCT__ "PCSetData_AGG"
584 PetscErrorCode PCSetData_AGG( PC pc )
585 {
586   PetscErrorCode  ierr;
587   PetscFunctionBegin;
588   ierr = PCSetCoordinates_AGG( pc, -1, PETSC_NULL ); CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 /* -------------------------------------------------------------------------- */
593 /*
594  formProl0
595 
596    Input Parameter:
597    . selected - list of selected local ID, includes selected ghosts
598    . locals_llist - linked list with aggregates
599    . bs - block size
600    . nSAvec - num columns of new P
601    . my0crs - global index of locals
602    . data_stride - bs*(nloc nodes + ghost nodes)
603    . data_in[data_stride*nSAvec] - local data on fine grid
604    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
605   Output Parameter:
606    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
607    . a_Prol - prolongation operator
608 */
609 #undef __FUNCT__
610 #define __FUNCT__ "formProl0"
611 PetscErrorCode formProl0( IS selected, /* list of selected local ID, includes selected ghosts */
612                           IS locals_llist, /* linked list from selected vertices of aggregate unselected vertices */
613                           const PetscInt bs,
614                           const PetscInt nSAvec,
615                           const PetscInt my0crs,
616                           const PetscInt data_stride,
617                           PetscReal data_in[],
618                           const PetscInt flid_fgid[],
619                           PetscReal **a_data_out,
620                           Mat a_Prol /* prolongation operator (output)*/
621                           )
622 {
623   PetscErrorCode ierr;
624   PetscInt  Istart,Iend,nFineLoc,clid,flid,aggID,kk,jj,ii,mm,nLocalSelected,ndone,nSelected,minsz;
625   MPI_Comm       wcomm = ((PetscObject)a_Prol)->comm;
626   PetscMPIInt    mype, npe;
627   const PetscInt *selected_idx,*llist_idx;
628   PetscReal      *out_data;
629 /* #define OUT_AGGS */
630 #ifdef OUT_AGGS
631   static PetscInt llev = 0; char fname[32]; FILE *file;
632   sprintf(fname,"aggs_%d.m",llev++);
633   if(llev==1) {
634     file = fopen(fname,"w");
635     fprintf(file,"figure,\n");
636   }
637 #endif
638 
639   PetscFunctionBegin;
640   ierr = MPI_Comm_rank(wcomm,&mype);CHKERRQ(ierr);
641   ierr = MPI_Comm_size(wcomm,&npe);CHKERRQ(ierr);
642   ierr = MatGetOwnershipRange( a_Prol, &Istart, &Iend );    CHKERRQ(ierr);
643   nFineLoc = (Iend-Istart)/bs; assert((Iend-Istart)%bs==0);
644 
645   ierr = ISGetSize( selected, &nSelected );        CHKERRQ(ierr);
646   ierr = ISGetIndices( selected, &selected_idx );       CHKERRQ(ierr);
647   ierr = ISGetIndices( locals_llist, &llist_idx );      CHKERRQ(ierr);
648   for(kk=0,nLocalSelected=0;kk<nSelected;kk++){
649     PetscInt lid = selected_idx[kk];
650     if( lid<nFineLoc && llist_idx[lid] != -1 ) nLocalSelected++;
651   }
652 
653   /* aloc space for coarse point data (output) */
654 #define DATA_OUT_STRIDE (nLocalSelected*nSAvec)
655   ierr = PetscMalloc( DATA_OUT_STRIDE*nSAvec*sizeof(PetscReal), &out_data ); CHKERRQ(ierr);
656   for(ii=0;ii<DATA_OUT_STRIDE*nSAvec;ii++) out_data[ii]=1.e300;
657   *a_data_out = out_data; /* output - stride nLocalSelected*nSAvec */
658 
659   /* find points and set prolongation */
660   minsz = 100;
661   ndone = 0;
662   for( mm = clid = 0 ; mm < nSelected ; mm++ ){
663     PetscInt lid = selected_idx[mm];
664     PetscInt cgid = my0crs + clid, cids[100];
665 
666     if( lid>=nFineLoc || llist_idx[lid]==-1 ) continue; /* skip ghost or singleton */
667 
668     /* count agg */
669     aggID = 0;
670     flid = selected_idx[mm]; assert(flid != -1);
671     do{
672       aggID++;
673     } while( (flid=llist_idx[flid]) != -1 );
674     if( aggID<minsz ) minsz = aggID;
675 
676     /* get block */
677     {
678       PetscBLASInt   asz=aggID,M=asz*bs,N=nSAvec,INFO;
679       PetscBLASInt   Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*bs;
680       PetscScalar    *qqc,*qqr,*TAU,*WORK;
681       PetscInt       *fids;
682 
683       ierr = PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc ); CHKERRQ(ierr);
684       ierr = PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr ); CHKERRQ(ierr);
685       ierr = PetscMalloc( N*sizeof(PetscScalar), &TAU ); CHKERRQ(ierr);
686       ierr = PetscMalloc( LWORK*sizeof(PetscScalar), &WORK ); CHKERRQ(ierr);
687       ierr = PetscMalloc( M*sizeof(PetscInt), &fids ); CHKERRQ(ierr);
688 
689       flid = selected_idx[mm];
690       aggID = 0;
691       do{
692         /* copy in B_i matrix - column oriented */
693         PetscReal *data = &data_in[flid*bs];
694         for( kk = ii = 0; ii < bs ; ii++ ) {
695           for( jj = 0; jj < N ; jj++ ) {
696             qqc[jj*Mdata + aggID*bs + ii] = data[jj*data_stride + ii];
697           }
698         }
699 #ifdef OUT_AGGS
700         if(llev==1) {
701           char str[] = "plot(%e,%e,'r*'), hold on,\n", col[] = "rgbkmc", sim[] = "*os+h>d<vx^";
702           PetscInt M,pi,pj,gid=Istart+flid;
703           str[12] = col[clid%6]; str[13] = sim[(clid/6)%11];
704           M = (PetscInt)(PetscSqrtScalar((PetscScalar)nFineLoc*npe));
705           pj = gid/M; pi = gid%M;
706           fprintf(file,str,(double)pi,(double)pj);
707           /* fprintf(file,str,data[2*data_stride+1],-data[2*data_stride]); */
708         }
709 #endif
710         /* set fine IDs */
711         for(kk=0;kk<bs;kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
712 
713         aggID++;
714       }while( (flid=llist_idx[flid]) != -1 );
715 
716       /* pad with zeros */
717       for( ii = asz*bs; ii < Mdata ; ii++ ) {
718 	for( jj = 0; jj < N ; jj++, kk++ ) {
719 	  qqc[jj*Mdata + ii] = .0;
720 	}
721       }
722 
723       ndone += aggID;
724       /* QR */
725       LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
726       if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error");
727       /* get R - column oriented - output B_{i+1} */
728       {
729         PetscReal *data = &out_data[clid*nSAvec];
730         for( jj = 0; jj < nSAvec ; jj++ ) {
731           for( ii = 0; ii < nSAvec ; ii++ ) {
732             assert(data[jj*DATA_OUT_STRIDE + ii] == 1.e300);
733             if( ii <= jj ) data[jj*DATA_OUT_STRIDE + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
734 	    else data[jj*DATA_OUT_STRIDE + ii] = 0.;
735           }
736         }
737       }
738 
739       /* get Q - row oriented */
740       LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
741       if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO);
742 
743       for( ii = 0 ; ii < M ; ii++ ){
744         for( jj = 0 ; jj < N ; jj++ ) {
745           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
746         }
747       }
748 
749       /* add diagonal block of P0 */
750       for(kk=0;kk<N;kk++) {
751         cids[kk] = N*cgid + kk; /* global col IDs in P0 */
752       }
753       ierr = MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES); CHKERRQ(ierr);
754 
755       ierr = PetscFree( qqc );  CHKERRQ(ierr);
756       ierr = PetscFree( qqr );  CHKERRQ(ierr);
757       ierr = PetscFree( TAU );  CHKERRQ(ierr);
758       ierr = PetscFree( WORK );  CHKERRQ(ierr);
759       ierr = PetscFree( fids );  CHKERRQ(ierr);
760     } /* scoping */
761     clid++;
762   } /* for all coarse nodes */
763 
764 /* ierr = MPI_Allreduce( &ndone, &ii, 1, MPIU_INT, MPIU_SUM, wcomm ); */
765 /* MatGetSize( a_Prol, &kk, &jj ); */
766 /* ierr = MPI_Allreduce( &minsz, &jj, 1, MPIU_INT, MPIU_MIN, wcomm ); */
767 /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, N=%d (%d local done), min agg. size = %d\n",mype,__FUNCT__,ii,kk/bs,ndone,jj); */
768 
769 #ifdef OUT_AGGS
770   if(llev==1) fclose(file);
771 #endif
772   ierr = ISRestoreIndices( selected, &selected_idx );     CHKERRQ(ierr);
773   ierr = ISRestoreIndices( locals_llist, &llist_idx );     CHKERRQ(ierr);
774   ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775   ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
776 
777   PetscFunctionReturn(0);
778 }
779 
780 /* -------------------------------------------------------------------------- */
781 /*
782    PCGAMGgraph_AGG
783 
784   Input Parameter:
785    . pc - this
786    . Amat - matrix on this fine level
787   Output Parameter:
788    . a_Gmat -
789 */
790 #undef __FUNCT__
791 #define __FUNCT__ "PCGAMGgraph_AGG"
792 PetscErrorCode PCGAMGgraph_AGG( PC pc,
793                                 const Mat Amat,
794                                 Mat *a_Gmat
795                                 )
796 {
797   PetscErrorCode ierr;
798   PC_MG          *mg = (PC_MG*)pc->data;
799   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
800   const PetscInt verbose = pc_gamg->verbose;
801   const PetscReal vfilter = pc_gamg->threshold;
802   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
803   PetscMPIInt    mype,npe;
804   Mat            Gmat, Gmat2;
805   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
806 
807   PetscFunctionBegin;
808   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
809   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
810 
811   ierr  = createSimpleGraph( Amat, &Gmat ); CHKERRQ( ierr );
812 
813   ierr  = scaleFilterGraph( &Gmat, vfilter, pc_gamg_agg->sym_graph, verbose ); CHKERRQ( ierr );
814 
815   ierr = MatTransposeMatMult( Gmat, Gmat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2 );
816   CHKERRQ(ierr);
817 
818   /* attach auxilary matrix */
819   pc_gamg_agg->aux_mat = Gmat;
820 
821   *a_Gmat = Gmat2;
822 
823   PetscFunctionReturn(0);
824 }
825 
826 /* -------------------------------------------------------------------------- */
827 /*
828    PCGAMGCoarsen_AGG
829 
830   Input Parameter:
831    . pc - this
832    . Gmat2 - matrix on this fine level
833   Output Parameter:
834    . a_selected - prolongation operator to the next level
835    . a_llist_parent - data of coarse grid points (num local columns in 'a_P_out')
836 */
837 #undef __FUNCT__
838 #define __FUNCT__ "PCGAMGCoarsen_AGG"
839 PetscErrorCode PCGAMGCoarsen_AGG( PC pc,
840                                   const Mat Gmat2,
841                                   IS *a_selected,
842                                   IS *a_llist_parent
843                                   )
844 {
845   PetscErrorCode ierr;
846   PC_MG          *mg = (PC_MG*)pc->data;
847   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
848   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
849   Mat             Gmat1; /* unsquared graph (not symetrized!) */
850   IS              perm, selected, llist_parent;
851   PetscInt        Ii,nloc,bs,n,m;
852   PetscInt *permute;
853   PetscBool *bIndexSet;
854   MatCoarsen crs;
855   MPI_Comm        wcomm = ((PetscObject)Gmat2)->comm;
856   /* PetscMPIInt     mype,npe; */
857 
858   PetscFunctionBegin;
859   /* ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr); */
860   /* ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr); */
861   ierr = MatGetLocalSize( Gmat2, &n, &m ); CHKERRQ(ierr);
862   ierr = MatGetBlockSize( Gmat2, &bs ); CHKERRQ(ierr); assert(bs==1);
863   nloc = n/bs;
864 
865   /* get unsquared graph */
866   Gmat1 = pc_gamg_agg->aux_mat; pc_gamg_agg->aux_mat = 0;
867 
868   /* get MIS aggs */
869   /* randomize */
870   ierr = PetscMalloc( nloc*sizeof(PetscInt), &permute ); CHKERRQ(ierr);
871   ierr = PetscMalloc( nloc*sizeof(PetscBool), &bIndexSet ); CHKERRQ(ierr);
872   for ( Ii = 0; Ii < nloc ; Ii++ ){
873     bIndexSet[Ii] = PETSC_FALSE;
874     permute[Ii] = Ii;
875   }
876   srand(1); /* make deterministic */
877   for ( Ii = 0; Ii < nloc ; Ii++ ) {
878     PetscInt iSwapIndex = rand()%nloc;
879     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
880       PetscInt iTemp = permute[iSwapIndex];
881       permute[iSwapIndex] = permute[Ii];
882       permute[Ii] = iTemp;
883       bIndexSet[iSwapIndex] = PETSC_TRUE;
884     }
885   }
886   ierr = PetscFree( bIndexSet );  CHKERRQ(ierr);
887 
888   ierr = ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm);
889   CHKERRQ(ierr);
890 #if defined PETSC_USE_LOG
891   ierr = PetscLogEventBegin(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
892 #endif
893   ierr = MatCoarsenCreate( wcomm, &crs ); CHKERRQ(ierr);
894   /* ierr = MatCoarsenSetType( crs, MATCOARSENMIS ); CHKERRQ(ierr); */
895   ierr = MatCoarsenSetFromOptions( crs ); CHKERRQ(ierr);
896   ierr = MatCoarsenSetGreedyOrdering( crs, perm ); CHKERRQ(ierr);
897   ierr = MatCoarsenSetAdjacency( crs, Gmat2 ); CHKERRQ(ierr);
898   ierr = MatCoarsenSetVerbose( crs, pc_gamg->verbose ); CHKERRQ(ierr);
899   ierr = MatCoarsenSetStrictAggs( crs, PETSC_TRUE ); CHKERRQ(ierr);
900   ierr = MatCoarsenApply( crs ); CHKERRQ(ierr);
901   ierr = MatCoarsenGetMISAggLists( crs, &selected, &llist_parent ); CHKERRQ(ierr);
902   ierr = MatCoarsenDestroy( &crs ); CHKERRQ(ierr);
903 
904   ierr = ISDestroy( &perm );                    CHKERRQ(ierr);
905   ierr = PetscFree( permute );  CHKERRQ(ierr);
906 #if defined PETSC_USE_LOG
907   ierr = PetscLogEventEnd(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
908 #endif
909   /* smooth aggs */
910   ierr = smoothAggs( Gmat2, Gmat1, selected, llist_parent ); CHKERRQ(ierr);
911 
912   ierr = MatDestroy( &Gmat1 );  CHKERRQ(ierr);
913 
914   *a_selected = selected;
915   *a_llist_parent = llist_parent;
916 
917   PetscFunctionReturn(0);
918 }
919 
920 /* -------------------------------------------------------------------------- */
921 /*
922  PCGAMGprolongator_AGG
923 
924  Input Parameter:
925  . pc - this
926  . Amat - matrix on this fine level
927  . Graph - used to get ghost data for nodes in
928  . selected - [nselected inc. chosts]
929  . llist_parent - [nloc + Gmat.nghost] linked list
930  Output Parameter:
931  . a_P_out - prolongation operator to the next level
932  */
933 #undef __FUNCT__
934 #define __FUNCT__ "PCGAMGprolongator_AGG"
935 PetscErrorCode PCGAMGprolongator_AGG( PC pc,
936                                      const Mat Amat,
937                                      const Mat Gmat,
938                                      IS selected,
939                                      IS llist_parent,
940                                      Mat *a_P_out
941                                      )
942 {
943   PC_MG          *mg = (PC_MG*)pc->data;
944   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
945   const PetscInt verbose = pc_gamg->verbose;
946   const PetscInt data_cols = pc_gamg->data_cell_cols;
947   PetscErrorCode ierr;
948   PetscInt       Istart,Iend,nloc,ii,jj,kk,my0,nLocalSelected,bs;
949   Mat            Prol;
950   PetscMPIInt    mype, npe;
951   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
952   const PetscInt *selected_idx,*llist_idx,col_bs=data_cols;
953   PetscReal      *data_w_ghost;
954   PetscInt       myCrs0, nbnodes=0, *flid_fgid;
955 
956   PetscFunctionBegin;
957   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
958   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
959   ierr = MatGetOwnershipRange( Amat, &Istart, &Iend ); CHKERRQ(ierr);
960   ierr  = MatGetBlockSize( Amat, &bs ); CHKERRQ( ierr );
961   nloc = (Iend-Istart)/bs; my0 = Istart/bs; assert((Iend-Istart)%bs==0);
962 
963   /* get 'nLocalSelected' */
964   ierr = ISGetSize( selected, &kk );        CHKERRQ(ierr);
965   ierr = ISGetIndices( selected, &selected_idx );     CHKERRQ(ierr);
966   ierr = ISGetIndices( llist_parent, &llist_idx );     CHKERRQ(ierr);
967   for(ii=0,nLocalSelected=0;ii<kk;ii++){
968     PetscInt lid = selected_idx[ii];
969     /* filter out singletons */
970     if( lid<nloc && llist_idx[lid] != -1) nLocalSelected++;
971   }
972   ierr = ISRestoreIndices( selected, &selected_idx );     CHKERRQ(ierr);
973   ierr = ISRestoreIndices( llist_parent, &llist_idx );     CHKERRQ(ierr);
974 
975   /* create prolongator, create P matrix */
976   ierr = MatCreateMPIAIJ( wcomm,
977                           nloc*bs, nLocalSelected*col_bs,
978                           PETSC_DETERMINE, PETSC_DETERMINE,
979                           data_cols, PETSC_NULL, data_cols, PETSC_NULL,
980                           &Prol );
981   CHKERRQ(ierr);
982 
983   /* can get all points "removed" */
984   ierr =  MatGetSize( Prol, &kk, &ii ); CHKERRQ(ierr);
985   if( ii==0 ) {
986     if( verbose ) {
987       PetscPrintf(wcomm,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__);
988     }
989     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
990     *a_P_out = PETSC_NULL;  /* out */
991     PetscFunctionReturn(0);
992   }
993   if( verbose ) {
994     PetscPrintf(PETSC_COMM_WORLD,"\t\t[%d]%s New grid %d nodes\n",mype,__FUNCT__,ii/bs);
995   }
996   ierr = MatGetOwnershipRangeColumn( Prol, &myCrs0, &kk ); CHKERRQ(ierr);
997   myCrs0 = myCrs0/col_bs;
998 
999   /* create global vector of data in 'data_w_ghost' */
1000 #if defined PETSC_USE_LOG
1001   ierr = PetscLogEventBegin(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
1002 #endif
1003   if (npe > 1) { /*  */
1004     PetscReal *tmp_gdata,*tmp_ldata,*tp2;
1005     ierr = PetscMalloc( nloc*sizeof(PetscReal), &tmp_ldata ); CHKERRQ(ierr);
1006     for( jj = 0 ; jj < data_cols ; jj++ ){
1007       for( kk = 0 ; kk < bs ; kk++) {
1008         PetscInt ii,nnodes;
1009         const PetscReal *tp = pc_gamg->data + jj*bs*nloc + kk;
1010         for( ii = 0 ; ii < nloc ; ii++, tp += bs ){
1011           tmp_ldata[ii] = *tp;
1012         }
1013         ierr = getDataWithGhosts( Gmat, 1, tmp_ldata, &nnodes, &tmp_gdata );
1014         CHKERRQ(ierr);
1015         if(jj==0 && kk==0) { /* now I know how many todal nodes - allocate */
1016           ierr = PetscMalloc( nnodes*bs*data_cols*sizeof(PetscReal), &data_w_ghost ); CHKERRQ(ierr);
1017           nbnodes = bs*nnodes;
1018         }
1019         tp2 = data_w_ghost + jj*bs*nnodes + kk;
1020         for( ii = 0 ; ii < nnodes ; ii++, tp2 += bs ){
1021           *tp2 = tmp_gdata[ii];
1022         }
1023         ierr = PetscFree( tmp_gdata ); CHKERRQ(ierr);
1024       }
1025     }
1026     ierr = PetscFree( tmp_ldata ); CHKERRQ(ierr);
1027   }
1028   else {
1029     nbnodes = bs*nloc;
1030     data_w_ghost = (PetscReal*)pc_gamg->data;
1031   }
1032 
1033   /* get P0 */
1034   if( npe > 1 ){
1035     PetscReal *fid_glid_loc,*fiddata;
1036     PetscInt nnodes;
1037 
1038     ierr = PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc ); CHKERRQ(ierr);
1039     for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
1040     ierr = getDataWithGhosts( Gmat, 1, fid_glid_loc, &nnodes, &fiddata );
1041     CHKERRQ(ierr);
1042     ierr = PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
1043     for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1044     ierr = PetscFree( fiddata ); CHKERRQ(ierr);
1045     assert(nnodes==nbnodes/bs);
1046     ierr = PetscFree( fid_glid_loc ); CHKERRQ(ierr);
1047   }
1048   else {
1049     ierr = PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
1050     for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk;
1051   }
1052 #if defined PETSC_USE_LOG
1053   ierr = PetscLogEventEnd(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
1054   ierr = PetscLogEventBegin(gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1055 #endif
1056   {
1057     PetscReal *data_out;
1058     ierr = formProl0( selected, llist_parent, bs, data_cols, myCrs0, nbnodes,
1059                       data_w_ghost, flid_fgid, &data_out, Prol );
1060     CHKERRQ(ierr);
1061     ierr = PetscFree( pc_gamg->data ); CHKERRQ( ierr );
1062     pc_gamg->data = data_out;
1063     pc_gamg->data_cell_rows = data_cols;
1064     pc_gamg->data_sz = data_cols*data_cols*nLocalSelected;
1065   }
1066  #if defined PETSC_USE_LOG
1067   ierr = PetscLogEventEnd(gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1068 #endif
1069   if (npe > 1) ierr = PetscFree( data_w_ghost );      CHKERRQ(ierr);
1070   ierr = PetscFree( flid_fgid ); CHKERRQ(ierr);
1071 
1072   /* attach block size of columns */
1073   if( pc_gamg->col_bs_id == -1 ) {
1074     ierr = PetscObjectComposedDataRegister( &pc_gamg->col_bs_id ); assert(pc_gamg->col_bs_id != -1 );
1075   }
1076   ierr = PetscObjectComposedDataSetInt( (PetscObject)Prol, pc_gamg->col_bs_id, data_cols ); CHKERRQ(ierr);
1077 
1078   *a_P_out = Prol;  /* out */
1079 
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /* -------------------------------------------------------------------------- */
1084 /*
1085    PCGAMGoptprol_AGG
1086 
1087   Input Parameter:
1088    . pc - this
1089    . Amat - matrix on this fine level
1090  In/Output Parameter:
1091    . a_P_out - prolongation operator to the next level
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "PCGAMGoptprol_AGG"
1095 PetscErrorCode PCGAMGoptprol_AGG( PC pc,
1096                                   const Mat Amat,
1097                                   Mat *a_P
1098                                   )
1099 {
1100   PetscErrorCode ierr;
1101   PC_MG          *mg = (PC_MG*)pc->data;
1102   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1103   const PetscInt verbose = pc_gamg->verbose;
1104   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
1105   PetscInt       jj;
1106   PetscMPIInt    mype,npe;
1107   Mat            Prol = *a_P;
1108   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
1109 
1110   PetscFunctionBegin;
1111   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
1112   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
1113 
1114   /* smooth P0 */
1115   for( jj = 0 ; jj < pc_gamg_agg->nsmooths ; jj++ ){
1116     Mat tMat;
1117     Vec diag;
1118     PetscReal alpha, emax, emin;
1119 #if defined PETSC_USE_LOG
1120     ierr = PetscLogEventBegin(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1121 #endif
1122     if( jj == 0 ) {
1123       KSP eksp;
1124       Vec bb, xx;
1125       PC pc;
1126       ierr = MatGetVecs( Amat, &bb, 0 );         CHKERRQ(ierr);
1127       ierr = MatGetVecs( Amat, &xx, 0 );         CHKERRQ(ierr);
1128       {
1129         PetscRandom    rctx;
1130         ierr = PetscRandomCreate(wcomm,&rctx);CHKERRQ(ierr);
1131         ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
1132         ierr = VecSetRandom(bb,rctx);CHKERRQ(ierr);
1133         ierr = PetscRandomDestroy( &rctx ); CHKERRQ(ierr);
1134       }
1135       ierr = KSPCreate(wcomm,&eksp);                            CHKERRQ(ierr);
1136       ierr = KSPAppendOptionsPrefix( eksp, "est_");         CHKERRQ(ierr);
1137       ierr = KSPSetFromOptions( eksp );    CHKERRQ(ierr);
1138       ierr = KSPSetInitialGuessNonzero( eksp, PETSC_FALSE );    CHKERRQ(ierr);
1139       ierr = KSPSetOperators( eksp, Amat, Amat, SAME_NONZERO_PATTERN );
1140       CHKERRQ( ierr );
1141       ierr = KSPGetPC( eksp, &pc );                              CHKERRQ( ierr );
1142       ierr = PCSetType( pc, PCJACOBI ); CHKERRQ(ierr);  /* smoother */
1143       ierr = KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10);
1144       CHKERRQ(ierr);
1145       ierr = KSPSetNormType( eksp, KSP_NORM_NONE );                 CHKERRQ(ierr);
1146       ierr = KSPSetComputeSingularValues( eksp,PETSC_TRUE );        CHKERRQ(ierr);
1147 
1148       /* solve - keep stuff out of logging */
1149       ierr = PetscLogEventDeactivate(KSP_Solve);CHKERRQ(ierr);
1150       ierr = PetscLogEventDeactivate(PC_Apply);CHKERRQ(ierr);
1151       ierr = KSPSolve( eksp, bb, xx );                              CHKERRQ(ierr);
1152       ierr = PetscLogEventActivate(KSP_Solve);CHKERRQ(ierr);
1153       ierr = PetscLogEventActivate(PC_Apply);CHKERRQ(ierr);
1154 
1155       ierr = KSPComputeExtremeSingularValues( eksp, &emax, &emin ); CHKERRQ(ierr);
1156       if( verbose ) {
1157         PetscPrintf(wcomm,"\t\t\t%s smooth P0: max eigen=%e min=%e PC=%s\n",
1158                     __FUNCT__,emax,emin,PCJACOBI);
1159       }
1160       ierr = VecDestroy( &xx );       CHKERRQ(ierr);
1161       ierr = VecDestroy( &bb );       CHKERRQ(ierr);
1162       ierr = KSPDestroy( &eksp );     CHKERRQ(ierr);
1163 
1164       if( pc_gamg->emax_id == -1 ) {
1165         ierr = PetscObjectComposedDataRegister( &pc_gamg->emax_id );
1166         assert(pc_gamg->emax_id != -1 );
1167       }
1168       ierr = PetscObjectComposedDataSetScalar( (PetscObject)Amat, pc_gamg->emax_id, emax ); CHKERRQ(ierr);
1169     }
1170 
1171     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1172     ierr = MatMatMult( Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat );   CHKERRQ(ierr);
1173     ierr = MatGetVecs( Amat, &diag, 0 );    CHKERRQ(ierr);
1174     ierr = MatGetDiagonal( Amat, diag );    CHKERRQ(ierr); /* effectively PCJACOBI */
1175     ierr = VecReciprocal( diag );         CHKERRQ(ierr);
1176     ierr = MatDiagonalScale( tMat, diag, 0 ); CHKERRQ(ierr);
1177     ierr = VecDestroy( &diag );           CHKERRQ(ierr);
1178     alpha = -1.5/emax;
1179     ierr = MatAYPX( tMat, alpha, Prol, SUBSET_NONZERO_PATTERN );           CHKERRQ(ierr);
1180     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
1181     Prol = tMat;
1182 #if defined PETSC_USE_LOG
1183     ierr = PetscLogEventEnd(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1184 #endif
1185   }
1186 
1187   *a_P = Prol;
1188 
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 /* -------------------------------------------------------------------------- */
1193 /*
1194    PCCreateGAMG_AGG
1195 
1196   Input Parameter:
1197    . pc -
1198 */
1199 #undef __FUNCT__
1200 #define __FUNCT__ "PCCreateGAMG_AGG"
1201 PetscErrorCode  PCCreateGAMG_AGG( PC pc )
1202 {
1203   PetscErrorCode  ierr;
1204   PC_MG           *mg = (PC_MG*)pc->data;
1205   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
1206   PC_GAMG_AGG      *pc_gamg_agg;
1207 
1208   PetscFunctionBegin;
1209   /* create sub context for SA */
1210   ierr = PetscNewLog( pc, PC_GAMG_AGG, &pc_gamg_agg ); CHKERRQ(ierr);
1211   assert(!pc_gamg->subctx);
1212   pc_gamg->subctx = pc_gamg_agg;
1213 
1214   pc->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1215   pc->ops->destroy        = PCDestroy_AGG;
1216   /* reset does not do anything; setup not virtual */
1217 
1218   /* set internal function pointers */
1219   pc_gamg->graph = PCGAMGgraph_AGG;
1220   pc_gamg->coarsen = PCGAMGCoarsen_AGG;
1221   pc_gamg->prolongator = PCGAMGprolongator_AGG;
1222   pc_gamg->optprol = PCGAMGoptprol_AGG;
1223 
1224   pc_gamg->createdefaultdata = PCSetData_AGG;
1225 
1226   ierr = PetscObjectComposeFunctionDynamic( (PetscObject)pc,
1227                                             "PCSetCoordinates_C",
1228                                             "PCSetCoordinates_AGG",
1229                                             PCSetCoordinates_AGG);
1230   PetscFunctionReturn(0);
1231 }
1232