xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision 74778d6c3e36e5aa800405c1381bdd58dfd20219)
1 /*
2  GAMG geometric-algebric multiogrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <private/kspimpl.h>
7 
8 #include <assert.h>
9 #include <petscblaslapack.h>
10 
11 typedef struct {
12   PetscInt nsmooths;
13   Mat aux_mat;
14   PetscBool sym_graph;
15 }PC_GAMG_AGG;
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "PCGAMGSetNSmooths"
19 /*@
20    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
21 
22    Not Collective on PC
23 
24    Input Parameters:
25 .  pc - the preconditioner context
26 
27    Options Database Key:
28 .  -pc_gamg_agg_nsmooths
29 
30    Level: intermediate
31 
32    Concepts: Aggregation AMG preconditioner
33 
34 .seealso: ()
35 @*/
36 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
37 {
38   PetscErrorCode ierr;
39 
40   PetscFunctionBegin;
41   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
42   ierr = PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 
46 EXTERN_C_BEGIN
47 #undef __FUNCT__
48 #define __FUNCT__ "PCGAMGSetNSmooths_GAMG"
49 PetscErrorCode PCGAMGSetNSmooths_GAMG(PC pc, PetscInt n)
50 {
51   PC_MG           *mg = (PC_MG*)pc->data;
52   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
53   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
54 
55   PetscFunctionBegin;
56   pc_gamg_agg->nsmooths = n;
57   PetscFunctionReturn(0);
58 }
59 EXTERN_C_END
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "PCGAMGSetSymGraph"
63 /*@
64    PCGAMGSetSymGraph -
65 
66    Not Collective on PC
67 
68    Input Parameters:
69 .  pc - the preconditioner context
70 
71    Options Database Key:
72 .  -pc_gamg_sym_graph
73 
74    Level: intermediate
75 
76    Concepts: Aggregation AMG preconditioner
77 
78 .seealso: ()
79 @*/
80 PetscErrorCode PCGAMGSetSymGraph(PC pc, PetscBool n)
81 {
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
86   ierr = PetscTryMethod(pc,"PCGAMGSetSymGraph_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 EXTERN_C_BEGIN
91 #undef __FUNCT__
92 #define __FUNCT__ "PCGAMGSetSymGraph_GAMG"
93 PetscErrorCode PCGAMGSetSymGraph_GAMG(PC pc, PetscBool n)
94 {
95   PC_MG           *mg = (PC_MG*)pc->data;
96   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
97   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
98 
99   PetscFunctionBegin;
100   pc_gamg_agg->sym_graph = n;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 /* -------------------------------------------------------------------------- */
106 /*
107    PCSetFromOptions_GAMG_AGG
108 
109   Input Parameter:
110    . pc -
111 */
112 #undef __FUNCT__
113 #define __FUNCT__ "PCSetFromOptions_GAMG_AGG"
114 PetscErrorCode PCSetFromOptions_GAMG_AGG( PC pc )
115 {
116   PetscErrorCode  ierr;
117   PC_MG           *mg = (PC_MG*)pc->data;
118   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
119   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
120   PetscBool        flag;
121 
122   PetscFunctionBegin;
123   /* call base class */
124   ierr = PCSetFromOptions_GAMG( pc ); CHKERRQ(ierr);
125 
126   ierr = PetscOptionsHead("GAMG-AGG options"); CHKERRQ(ierr);
127   {
128     /* -pc_gamg_agg_nsmooths */
129     pc_gamg_agg->nsmooths = 0;
130     ierr = PetscOptionsInt("-pc_gamg_agg_nsmooths",
131                            "smoothing steps for smoothed aggregation, usually 1 (0)",
132                            "PCGAMGSetNSmooths",
133                            pc_gamg_agg->nsmooths,
134                            &pc_gamg_agg->nsmooths,
135                            &flag);
136     CHKERRQ(ierr);
137 
138     /* -pc_gamg_sym_graph */
139     pc_gamg_agg->sym_graph = PETSC_FALSE;
140     ierr = PetscOptionsBool("-pc_gamg_sym_graph",
141                             "Set for asymetric matrices",
142                             "PCGAMGSetSymGraph",
143                             pc_gamg_agg->sym_graph,
144                             &pc_gamg_agg->sym_graph,
145                             &flag);
146     CHKERRQ(ierr);
147   }
148   ierr = PetscOptionsTail();CHKERRQ(ierr);
149 
150   if( pc_gamg->verbose > 1 ) {
151     PetscPrintf(PETSC_COMM_WORLD,"[%d]%s done\n",0,__FUNCT__);
152   }
153 
154   PetscFunctionReturn(0);
155 }
156 
157 /* -------------------------------------------------------------------------- */
158 /*
159    PCDestroy_AGG
160 
161   Input Parameter:
162    . pc -
163 */
164 #undef __FUNCT__
165 #define __FUNCT__ "PCDestroy_AGG"
166 PetscErrorCode PCDestroy_AGG( PC pc )
167 {
168   PetscErrorCode  ierr;
169   PC_MG           *mg = (PC_MG*)pc->data;
170   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
171   PC_GAMG_AGG      *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
172 
173   PetscFunctionBegin;
174   if( pc_gamg_agg ) {
175     ierr = PetscFree(pc_gamg_agg);CHKERRQ(ierr);
176     pc_gamg_agg = 0;
177   }
178 
179   /* call base class */
180   ierr = PCDestroy_GAMG( pc );CHKERRQ(ierr);
181 
182   PetscFunctionReturn(0);
183 }
184 
185 /* -------------------------------------------------------------------------- */
186 /*
187    PCSetCoordinates_AGG
188 
189    Input Parameter:
190    .  pc - the preconditioner context
191 */
192 EXTERN_C_BEGIN
193 #undef __FUNCT__
194 #define __FUNCT__ "PCSetCoordinates_AGG"
195 PetscErrorCode PCSetCoordinates_AGG( PC pc, PetscInt ndm, PetscReal *coords )
196 {
197   PC_MG          *mg = (PC_MG*)pc->data;
198   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
199   PetscErrorCode ierr;
200   PetscInt       arrsz,bs,my0,kk,ii,jj,nloc,Iend;
201   Mat            Amat = pc->pmat;
202 
203   PetscFunctionBegin;
204   PetscValidHeaderSpecific( Amat, MAT_CLASSID, 1 );
205   ierr  = MatGetBlockSize( Amat, &bs );               CHKERRQ( ierr );
206   ierr  = MatGetOwnershipRange( Amat, &my0, &Iend ); CHKERRQ(ierr);
207   nloc = (Iend-my0)/bs;
208   if((Iend-my0)%bs!=0) SETERRQ1(((PetscObject)Amat)->comm,PETSC_ERR_ARG_WRONG, "Bad local size %d.",nloc);
209 
210   /* SA: null space vectors */
211   if( coords && bs==1 ) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
212   else if( coords ) pc_gamg->data_cell_cols = (ndm==2 ? 3 : 6); /* elasticity */
213   else pc_gamg->data_cell_cols = bs; /* no data, force SA with constant null space vectors */
214   pc_gamg->data_cell_rows = bs;
215 
216   arrsz = nloc*pc_gamg->data_cell_rows*pc_gamg->data_cell_cols;
217 
218   /* create data - syntactic sugar that should be refactored at some point */
219   if (pc_gamg->data==0 || (pc_gamg->data_sz != arrsz)) {
220     ierr = PetscFree( pc_gamg->data );  CHKERRQ(ierr);
221     ierr = PetscMalloc(arrsz*sizeof(PetscReal), &pc_gamg->data ); CHKERRQ(ierr);
222   }
223   /* copy data in - column oriented */
224   for(kk=0;kk<nloc;kk++){
225     const PetscInt M = Iend - my0;
226     PetscReal *data = &pc_gamg->data[kk*bs];
227     if( pc_gamg->data_cell_cols==1 ) *data = 1.0;
228     else {
229       for(ii=0;ii<bs;ii++)
230         for(jj=0;jj<bs;jj++)
231           if(ii==jj)data[ii*M + jj] = 1.0; /* translational modes */
232           else data[ii*M + jj] = 0.0;
233       if( coords ) {
234         if( ndm == 2 ){ /* rotational modes */
235           data += 2*M;
236           data[0] = -coords[2*kk+1];
237           data[1] =  coords[2*kk];
238         }
239         else {
240           data += 3*M;
241           data[0] = 0.0;               data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
242           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;               data[2*M+1] =  coords[3*kk];
243           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
244         }
245       }
246     }
247   }
248 
249   pc_gamg->data_sz = arrsz;
250 
251   PetscFunctionReturn(0);
252 }
253 EXTERN_C_END
254 
255 /* -------------------------------------------------------------------------- */
256 /*
257    smoothAggs - greedy grab of with G1 (unsquared graph) -- AIJ specific!!!
258 
259    Input Parameter:
260    . Gmat_2 - glabal matrix of graph (data not defined)
261    . Gmat_1 - base graph to grab with
262    . selected_2 -
263    Input/Output Parameter:
264    . llist_aggs_2 - linked list of aggs, ghost lids are based on Gmat_2 (squared graph)
265 */
266 #undef __FUNCT__
267 #define __FUNCT__ "smoothAggs"
268 PetscErrorCode smoothAggs( const Mat Gmat_2, /* base (squared) graph */
269                            const Mat Gmat_1, /* base graph, could be unsymmetic */
270                            const IS selected_2, /* [nselected total] selected vertices */
271                            IS llist_aggs_2 /* [nloc_nghost] global ID of aggregate */
272                            )
273 {
274   PetscErrorCode ierr;
275   PetscBool      isMPI;
276   Mat_SeqAIJ    *matA_1, *matB_1=0, *matA_2, *matB_2=0;
277   MPI_Comm       wcomm = ((PetscObject)Gmat_2)->comm;
278   PetscMPIInt    mype;
279   PetscInt       lid,*ii,*idx,ix,Iend,my0,nSelected,nnodes_2,kk,n,j;
280   Mat_MPIAIJ    *mpimat_2 = 0, *mpimat_1=0;
281   const PetscInt nloc = Gmat_2->rmap->n;
282   PetscScalar   *cpcol_1_state,*cpcol_2_state,*deleted_parent_gid;
283   PetscInt      *lid_cprowID_1,*id_llist_2,*lid_cprowID_2;
284   NState        *lid_state;
285 
286   PetscFunctionBegin;
287   ierr = MPI_Comm_rank( wcomm, &mype );   CHKERRQ(ierr);
288   ierr = MatGetOwnershipRange(Gmat_1,&my0,&Iend);  CHKERRQ(ierr);
289 
290   if( !PETSC_TRUE ) {
291     PetscViewer viewer; char fname[32]; static int llev=0;
292     sprintf(fname,"Gmat2_%d.m",llev++);
293     PetscViewerASCIIOpen(wcomm,fname,&viewer);
294     ierr = PetscViewerSetFormat( viewer, PETSC_VIEWER_ASCII_MATLAB);  CHKERRQ(ierr);
295     ierr = MatView(Gmat_2, viewer ); CHKERRQ(ierr);
296     ierr = PetscViewerDestroy( &viewer );
297   }
298 
299   { /* copy linked list into temp buffer - should not work directly on pointer */
300     const PetscInt *llist_idx;
301     ierr = ISGetSize( llist_aggs_2, &nnodes_2 );        CHKERRQ(ierr);
302     ierr = PetscMalloc( nnodes_2*sizeof(PetscInt), &id_llist_2 ); CHKERRQ(ierr);
303     ierr = ISGetIndices( llist_aggs_2, &llist_idx );       CHKERRQ(ierr);
304     for(lid=0;lid<nnodes_2;lid++) id_llist_2[lid] = llist_idx[lid];
305     ierr = ISRestoreIndices( llist_aggs_2, &llist_idx );     CHKERRQ(ierr);
306   }
307 
308   /* get submatrices */
309   ierr = PetscTypeCompare( (PetscObject)Gmat_1, MATMPIAIJ, &isMPI ); CHKERRQ(ierr);
310   if(isMPI) {
311     /* grab matrix objects */
312     mpimat_2 = (Mat_MPIAIJ*)Gmat_2->data;
313     mpimat_1 = (Mat_MPIAIJ*)Gmat_1->data;
314     matA_1 = (Mat_SeqAIJ*)mpimat_1->A->data;
315     matB_1 = (Mat_SeqAIJ*)mpimat_1->B->data;
316     matA_2 = (Mat_SeqAIJ*)mpimat_2->A->data;
317     matB_2 = (Mat_SeqAIJ*)mpimat_2->B->data;
318 
319     /* force compressed row storage for B matrix in AuxMat */
320     matB_1->compressedrow.check = PETSC_TRUE;
321     ierr = MatCheckCompressedRow(mpimat_1->B,&matB_1->compressedrow,matB_1->i,Gmat_1->rmap->n,-1.0);
322     CHKERRQ(ierr);
323 
324     ierr = PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_2 ); CHKERRQ(ierr);
325     ierr = PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID_1 ); CHKERRQ(ierr);
326     for(lid=0;lid<nloc;lid++) lid_cprowID_1[lid] = lid_cprowID_2[lid] = -1;
327     for (ix=0; ix<matB_1->compressedrow.nrows; ix++) {
328       PetscInt lid = matB_1->compressedrow.rindex[ix];
329       lid_cprowID_1[lid] = ix;
330     }
331     for (ix=0; ix<matB_2->compressedrow.nrows; ix++) {
332       PetscInt lid = matB_2->compressedrow.rindex[ix];
333       lid_cprowID_2[lid] = ix;
334     }
335   }
336   else {
337     matA_1 = (Mat_SeqAIJ*)Gmat_1->data;
338     matA_2 = (Mat_SeqAIJ*)Gmat_2->data;
339     lid_cprowID_2 = lid_cprowID_1 = 0;
340   }
341   assert( matA_1 && !matA_1->compressedrow.use );
342   assert( matB_1==0 || matB_1->compressedrow.use );
343   assert( matA_2 && !matA_2->compressedrow.use );
344   assert( matB_2==0 || matB_2->compressedrow.use );
345 
346   /* get state of locals and selected gid for deleted */
347   ierr = PetscMalloc( nloc*sizeof(NState), &lid_state ); CHKERRQ(ierr);
348   ierr = PetscMalloc( nloc*sizeof(PetscScalar), &deleted_parent_gid ); CHKERRQ(ierr);
349   for( lid = 0 ; lid < nloc ; lid++ ) {
350     deleted_parent_gid[lid] = -1.0;
351     lid_state[lid] = DELETED;
352     ii = matA_2->i; n = ii[lid+1] - ii[lid];
353     if( n < 2 ) {
354       if(!lid_cprowID_2 || (ix=lid_cprowID_2[lid])==-1 || (matB_2->compressedrow.i[ix+1]-matB_2->compressedrow.i[ix])==0){
355         lid_state[lid] = REMOVED;
356       }
357     }
358   }
359   /* set index into compressed row 'lid_cprowID', not -1 means its a boundary node */
360   {
361     const PetscInt *selected_idx;
362     /* set local selected */
363     ierr = ISGetSize( selected_2, &nSelected );        CHKERRQ(ierr);
364     ierr = ISGetIndices( selected_2, &selected_idx );       CHKERRQ(ierr);
365     for(kk=0;kk<nSelected;kk++){
366       PetscInt lid = selected_idx[kk];
367       if(lid<nloc) lid_state[lid] = (NState)(lid+my0); /* selected flag */
368       else break;
369     }
370     ierr = ISRestoreIndices( selected_2, &selected_idx );     CHKERRQ(ierr);
371   }
372   /* map local to selected local, -1 means a ghost owns it */
373   for(lid=kk=0;lid<nloc;lid++){
374     NState state = lid_state[lid];
375     if( IS_SELECTED(state) ){
376       PetscInt flid = lid;
377       do{
378         if(flid<nloc){
379           deleted_parent_gid[flid] = (PetscScalar)(lid + my0);
380         }
381         kk++;
382       } while( (flid=id_llist_2[flid]) != -1 );
383     }
384   }
385   /* get 'cpcol_1_state', 'cpcol_2_state' - uses mpimat_1->lvec & mpimat_2->lvec for temp space */
386   if (isMPI) {
387     Vec          tempVec;
388 
389     /* get 'cpcol_1_state' */
390     ierr = MatGetVecs( Gmat_1, &tempVec, 0 );         CHKERRQ(ierr);
391     for(kk=0,j=my0;kk<nloc;kk++,j++){
392       PetscScalar v = (PetscScalar)lid_state[kk];
393       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
394     }
395     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
396     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
397     ierr = VecScatterBegin(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
398     CHKERRQ(ierr);
399     ierr =   VecScatterEnd(mpimat_1->Mvctx,tempVec, mpimat_1->lvec,INSERT_VALUES,SCATTER_FORWARD);
400     CHKERRQ(ierr);
401     ierr = VecGetArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
402     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
403 
404     /* get 'cpcol_2_state' */
405     ierr = MatGetVecs( Gmat_2, &tempVec, 0 );         CHKERRQ(ierr);
406     for(kk=0,j=my0;kk<nloc;kk++,j++){
407       PetscScalar v = (PetscScalar)lid_state[kk];
408       ierr = VecSetValues( tempVec, 1, &j, &v, INSERT_VALUES );  CHKERRQ(ierr);
409     }
410     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
411     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
412     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
413     CHKERRQ(ierr);
414     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
415     CHKERRQ(ierr);
416     ierr = VecGetArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
417     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
418   } /* ismpi */
419 
420   /* doit */
421   for(lid=0;lid<nloc;lid++){
422     NState state = lid_state[lid];
423     if( IS_SELECTED(state) ) {      /* steal locals */
424       ii = matA_1->i; n = ii[lid+1] - ii[lid];
425       idx = matA_1->j + ii[lid];
426       for (j=0; j<n; j++) {
427         PetscInt flid, lidj = idx[j], sgid;
428         NState statej = lid_state[lidj];
429         if (statej==DELETED && (sgid=(PetscInt)PetscRealPart(deleted_parent_gid[lidj])) != lid+my0) { /* steal local */
430           deleted_parent_gid[lidj] = (PetscScalar)(lid+my0); /* send this with _2 */
431           if( sgid >= my0 && sgid < my0+nloc ){       /* I'm stealing this local from a local */
432             PetscInt hav=0, flid2=sgid-my0, lastid;
433             /* looking for local from local so id_llist_2 works */
434             for( lastid=flid2, flid=id_llist_2[flid2] ; flid!=-1 ; flid=id_llist_2[flid] ) {
435               if( flid == lidj ) {
436                 id_llist_2[lastid] = id_llist_2[flid];                    /* remove lidj from list */
437                 id_llist_2[flid] = id_llist_2[lid]; id_llist_2[lid] = flid; /* insert 'lidj' into head of llist */
438                 hav++;
439                 break;
440               }
441               lastid = flid;
442             }
443             if(hav!=1){
444               if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
445               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
446             }
447           }
448           else{            /* I'm stealing this local, owned by a ghost - ok to use _2, local */
449             assert(sgid==-1);
450             id_llist_2[lidj] = id_llist_2[lid]; id_llist_2[lid] = lidj; /* insert 'lidj' into head of llist */
451             /* local remove at end, off add/rm at end */
452           }
453         }
454       }
455     }
456     else if( state == DELETED && lid_cprowID_1 ) {
457       PetscInt sgidold = (PetscInt)PetscRealPart(deleted_parent_gid[lid]);
458       /* see if I have a selected ghost neighbor that will steal me */
459       if( (ix=lid_cprowID_1[lid]) != -1 ){
460         ii = matB_1->compressedrow.i; n = ii[ix+1] - ii[ix];
461         idx = matB_1->j + ii[ix];
462         for( j=0 ; j<n ; j++ ) {
463           PetscInt cpid = idx[j];
464           NState statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
465           if( IS_SELECTED(statej) && sgidold != (PetscInt)statej ) { /* ghost will steal this, remove from my list */
466             deleted_parent_gid[lid] = (PetscScalar)statej; /* send who selected with _2 */
467             if( sgidold>=my0 && sgidold<(my0+nloc) ) { /* this was mine */
468               PetscInt lastid,hav=0,flid,oldslidj=sgidold-my0;
469               /* remove from 'oldslidj' list, local so _2 is OK */
470               for( lastid=oldslidj, flid=id_llist_2[oldslidj] ; flid != -1 ; flid=id_llist_2[flid] ) {
471                 if( flid == lid ) {
472                   id_llist_2[lastid] = id_llist_2[flid];   /* remove lid from oldslidj list */
473                   hav++;
474                   break;
475                 }
476                 lastid = flid;
477               }
478               if(hav!=1){
479                 if(hav==0)SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"failed to find adj in 'selected' lists - structurally unsymmetric matrix");
480                 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"found node %d times???",hav);
481               }
482               id_llist_2[lid] = -1; /* terminate linked list - needed? */
483             }
484             else assert(id_llist_2[lid] == -1);
485           }
486         }
487       }
488     } /* selected/deleted */
489     else assert(state == REMOVED || !lid_cprowID_1);
490   } /* node loop */
491 
492   if( isMPI ) {
493     PetscScalar *cpcol_2_sel_gid;
494     Vec          tempVec;
495     PetscInt     cpid;
496 
497     ierr = VecRestoreArray( mpimat_1->lvec, &cpcol_1_state ); CHKERRQ(ierr);
498     ierr = VecRestoreArray( mpimat_2->lvec, &cpcol_2_state ); CHKERRQ(ierr);
499 
500     /* get 'cpcol_2_sel_gid' */
501     ierr = MatGetVecs( Gmat_2, &tempVec, 0 );         CHKERRQ(ierr);
502     for(kk=0,j=my0;kk<nloc;kk++,j++){
503       ierr = VecSetValues( tempVec, 1, &j, &deleted_parent_gid[kk], INSERT_VALUES );  CHKERRQ(ierr);
504     }
505     ierr = VecAssemblyBegin( tempVec ); CHKERRQ(ierr);
506     ierr = VecAssemblyEnd( tempVec ); CHKERRQ(ierr);
507     ierr = VecScatterBegin(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
508     CHKERRQ(ierr);
509     ierr =   VecScatterEnd(mpimat_2->Mvctx,tempVec, mpimat_2->lvec,INSERT_VALUES,SCATTER_FORWARD);
510     CHKERRQ(ierr);
511     ierr = VecDestroy( &tempVec ); CHKERRQ(ierr);
512 
513     ierr = VecGetArray( mpimat_2->lvec, &cpcol_2_sel_gid ); CHKERRQ(ierr);
514 
515     /* look for deleted ghosts and see if they moved */
516     for(lid=0;lid<nloc;lid++){
517       NState state = lid_state[lid];
518       if( IS_SELECTED(state) ){
519         PetscInt flid,lastid,old_sgid=lid+my0;
520         /* look for deleted ghosts and see if they moved */
521         for( lastid=lid, flid=id_llist_2[lid] ; flid!=-1 ; flid=id_llist_2[flid] ) {
522           if( flid>=nloc ) {
523             PetscInt cpid = flid-nloc, sgid_new = (PetscInt)PetscRealPart(cpcol_2_sel_gid[cpid]);
524             if( sgid_new != old_sgid && sgid_new != -1 ) {
525               id_llist_2[lastid] = id_llist_2[flid];                    /* remove 'flid' from list */
526               id_llist_2[flid] = -1;
527               flid = lastid;
528             } /* if it changed parents */
529             else lastid = flid;
530           } /* for ghost nodes */
531           else lastid = flid;
532         } /* loop over list of deleted */
533       } /* selected */
534     }
535 
536     /* look at ghosts, see if they changed, and moved here */
537     for(cpid=0;cpid<nnodes_2-nloc;cpid++){
538       PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_sel_gid[cpid]);
539       if( sgid_new>=my0 && sgid_new<(my0+nloc) ) { /* this is mine */
540         PetscInt lastid,flid,slid_new=sgid_new-my0,flidj=nloc+cpid,hav=0;
541         for( lastid=slid_new, flid=id_llist_2[slid_new] ; flid != -1 ; flid=id_llist_2[flid] ) {
542           if( flid == flidj ) {
543             hav++;
544             break;
545           }
546           lastid = flid;
547         }
548         if( hav != 1 ){
549           assert(id_llist_2[flidj] == -1);
550           id_llist_2[flidj] = id_llist_2[slid_new]; id_llist_2[slid_new] = flidj; /* insert 'flidj' into head of llist */
551         }
552       }
553     }
554 
555     ierr = VecRestoreArray( mpimat_2->lvec, &cpcol_2_sel_gid ); CHKERRQ(ierr);
556     ierr = PetscFree( lid_cprowID_1 );  CHKERRQ(ierr);
557     ierr = PetscFree( lid_cprowID_2 );  CHKERRQ(ierr);
558   }
559 
560   /* copy out new aggs */
561   ierr = ISGeneralSetIndices(llist_aggs_2, nnodes_2, id_llist_2, PETSC_COPY_VALUES ); CHKERRQ(ierr);
562 
563   ierr = PetscFree( id_llist_2 );  CHKERRQ(ierr);
564   ierr = PetscFree( deleted_parent_gid );  CHKERRQ(ierr);
565   ierr = PetscFree( lid_state );  CHKERRQ(ierr);
566 
567   PetscFunctionReturn(0);
568 }
569 
570 /* -------------------------------------------------------------------------- */
571 /*
572    PCSetData_AGG
573 
574   Input Parameter:
575    . pc -
576 */
577 #undef __FUNCT__
578 #define __FUNCT__ "PCSetData_AGG"
579 PetscErrorCode PCSetData_AGG( PC pc )
580 {
581   PetscErrorCode  ierr;
582   PetscFunctionBegin;
583   ierr = PCSetCoordinates_AGG( pc, -1, PETSC_NULL ); CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 /* -------------------------------------------------------------------------- */
588 /*
589  formProl0
590 
591    Input Parameter:
592    . selected - list of selected local ID, includes selected ghosts
593    . locals_llist - linked list with aggregates
594    . bs - block size
595    . nSAvec - num columns of new P
596    . my0crs - global index of locals
597    . data_stride - bs*(nloc nodes + ghost nodes)
598    . data_in[data_stride*nSAvec] - local data on fine grid
599    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
600   Output Parameter:
601    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
602    . a_Prol - prolongation operator
603 */
604 #undef __FUNCT__
605 #define __FUNCT__ "formProl0"
606 PetscErrorCode formProl0( IS selected, /* list of selected local ID, includes selected ghosts */
607                           IS locals_llist, /* linked list from selected vertices of aggregate unselected vertices */
608                           const PetscInt bs,
609                           const PetscInt nSAvec,
610                           const PetscInt my0crs,
611                           const PetscInt data_stride,
612                           PetscReal data_in[],
613                           const PetscInt flid_fgid[],
614                           PetscReal **a_data_out,
615                           Mat a_Prol /* prolongation operator (output)*/
616                           )
617 {
618   PetscErrorCode ierr;
619   PetscInt  Istart,Iend,nFineLoc,clid,flid,aggID,kk,jj,ii,nLocalSelected,ndone,nSelected,minsz;
620   MPI_Comm       wcomm = ((PetscObject)a_Prol)->comm;
621   PetscMPIInt    mype, npe;
622   const PetscInt *selected_idx,*llist_idx;
623   PetscReal      *out_data;
624 
625   PetscFunctionBegin;
626   ierr = MPI_Comm_rank(wcomm,&mype);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(wcomm,&npe);CHKERRQ(ierr);
628   ierr = MatGetOwnershipRange( a_Prol, &Istart, &Iend );    CHKERRQ(ierr);
629   nFineLoc = (Iend-Istart)/bs; assert((Iend-Istart)%bs==0);
630 
631   ierr = ISGetSize( selected, &nSelected );        CHKERRQ(ierr);
632   ierr = ISGetIndices( selected, &selected_idx );       CHKERRQ(ierr);
633   for(kk=0,nLocalSelected=0;kk<nSelected;kk++){
634     PetscInt lid = selected_idx[kk];
635     if(lid<nFineLoc) nLocalSelected++;
636   }
637 
638   /* aloc space for coarse point data (output) */
639 #define DATA_OUT_STRIDE (nLocalSelected*nSAvec)
640   ierr = PetscMalloc( DATA_OUT_STRIDE*nSAvec*sizeof(PetscReal), &out_data ); CHKERRQ(ierr);
641   for(ii=0;ii<DATA_OUT_STRIDE*nSAvec;ii++) out_data[ii]=1.e300;
642   *a_data_out = out_data; /* output - stride nLocalSelected*nSAvec */
643 
644   /* find points and set prolongation */
645   minsz = 100;
646   ndone = 0;
647   ierr = ISGetIndices( locals_llist, &llist_idx );      CHKERRQ(ierr);
648   for( clid = 0 ; clid < nLocalSelected ; clid++ ){
649     PetscInt cgid = my0crs + clid, cids[100];
650 
651     /* count agg */
652     aggID = 0;
653     flid = selected_idx[clid]; assert(flid != -1);
654     do{
655       aggID++;
656     } while( (flid=llist_idx[flid]) != -1 );
657     if( aggID<minsz ) minsz = aggID;
658 
659     /* get block */
660     {
661       PetscBLASInt   asz=aggID,M=asz*bs,N=nSAvec,INFO;
662       PetscBLASInt   Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*bs;
663       PetscScalar    *qqc,*qqr,*TAU,*WORK;
664       PetscInt       *fids;
665 
666       ierr = PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc ); CHKERRQ(ierr);
667       ierr = PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr ); CHKERRQ(ierr);
668       ierr = PetscMalloc( N*sizeof(PetscScalar), &TAU ); CHKERRQ(ierr);
669       ierr = PetscMalloc( LWORK*sizeof(PetscScalar), &WORK ); CHKERRQ(ierr);
670       ierr = PetscMalloc( M*sizeof(PetscInt), &fids ); CHKERRQ(ierr);
671 
672       flid = selected_idx[clid];
673       aggID = 0;
674       do{
675         /* copy in B_i matrix - column oriented */
676         PetscReal *data = &data_in[flid*bs];
677         for( kk = ii = 0; ii < bs ; ii++ ) {
678           for( jj = 0; jj < N ; jj++ ) {
679             qqc[jj*Mdata + aggID*bs + ii] = data[jj*data_stride + ii];
680           }
681         }
682 
683         /* set fine IDs */
684         for(kk=0;kk<bs;kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
685 
686         aggID++;
687       }while( (flid=llist_idx[flid]) != -1 );
688 
689       /* pad with zeros */
690       for( ii = asz*bs; ii < Mdata ; ii++ ) {
691 	for( jj = 0; jj < N ; jj++, kk++ ) {
692 	  qqc[jj*Mdata + ii] = .0;
693 	}
694       }
695 
696       ndone += aggID;
697       /* QR */
698       LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
699       if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error");
700       /* get R - column oriented - output B_{i+1} */
701       {
702         PetscReal *data = &out_data[clid*nSAvec];
703         for( jj = 0; jj < nSAvec ; jj++ ) {
704           for( ii = 0; ii < nSAvec ; ii++ ) {
705             assert(data[jj*DATA_OUT_STRIDE + ii] == 1.e300);
706             if( ii <= jj ) data[jj*DATA_OUT_STRIDE + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
707 	    else data[jj*DATA_OUT_STRIDE + ii] = 0.;
708           }
709         }
710       }
711 
712       /* get Q - row oriented */
713       LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
714       if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO);
715 
716       for( ii = 0 ; ii < M ; ii++ ){
717         for( jj = 0 ; jj < N ; jj++ ) {
718           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
719         }
720       }
721 
722       /* add diagonal block of P0 */
723       for(kk=0;kk<N;kk++) {
724         cids[kk] = N*cgid + kk; /* global col IDs in P0 */
725       }
726       ierr = MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES); CHKERRQ(ierr);
727 
728       ierr = PetscFree( qqc );  CHKERRQ(ierr);
729       ierr = PetscFree( qqr );  CHKERRQ(ierr);
730       ierr = PetscFree( TAU );  CHKERRQ(ierr);
731       ierr = PetscFree( WORK );  CHKERRQ(ierr);
732       ierr = PetscFree( fids );  CHKERRQ(ierr);
733     } /* scoping */
734   } /* for all coarse nodes */
735 
736 /* ierr = MPI_Allreduce( &ndone, &ii, 1, MPIU_INT, MPIU_SUM, wcomm ); */
737 /* MatGetSize( a_Prol, &kk, &jj ); */
738 /* ierr = MPI_Allreduce( &minsz, &jj, 1, MPIU_INT, MPIU_MIN, wcomm ); */
739 /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, N=%d (%d local done), min agg. size = %d\n",mype,__FUNCT__,ii,kk/bs,ndone,jj); */
740 
741   ierr = ISRestoreIndices( selected, &selected_idx );     CHKERRQ(ierr);
742   ierr = ISRestoreIndices( locals_llist, &llist_idx );     CHKERRQ(ierr);
743   ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744   ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745 
746   PetscFunctionReturn(0);
747 }
748 
749 /* -------------------------------------------------------------------------- */
750 /*
751    PCGAMGgraph_AGG
752 
753   Input Parameter:
754    . pc - this
755    . Amat - matrix on this fine level
756   Output Parameter:
757    . a_Gmat -
758 */
759 #undef __FUNCT__
760 #define __FUNCT__ "PCGAMGgraph_AGG"
761 PetscErrorCode PCGAMGgraph_AGG( PC pc,
762                                 const Mat Amat,
763                                 Mat *a_Gmat
764                                 )
765 {
766   PetscErrorCode ierr;
767   PC_MG          *mg = (PC_MG*)pc->data;
768   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
769   const PetscInt verbose = pc_gamg->verbose;
770   const PetscReal vfilter = pc_gamg->threshold;
771   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
772   PetscMPIInt    mype,npe;
773   Mat            Gmat, Gmat2;
774   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
775 
776   PetscFunctionBegin;
777   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
778   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
779 
780   ierr  = createSimpleGraph( Amat, &Gmat ); CHKERRQ( ierr );
781 
782   ierr  = scaleFilterGraph( &Gmat, vfilter, pc_gamg_agg->sym_graph, verbose ); CHKERRQ( ierr );
783 
784   ierr = MatTransposeMatMult( Gmat, Gmat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Gmat2 );
785   CHKERRQ(ierr);
786 
787   /* attach auxilary matrix */
788   pc_gamg_agg->aux_mat = Gmat;
789 
790   *a_Gmat = Gmat2;
791 
792   PetscFunctionReturn(0);
793 }
794 
795 /* -------------------------------------------------------------------------- */
796 /*
797    PCGAMGcoarsen_AGG
798 
799   Input Parameter:
800    . pc - this
801    . Gmat2 - matrix on this fine level
802   Output Parameter:
803    . a_selected - prolongation operator to the next level
804    . a_llist_parent - data of coarse grid points (num local columns in 'a_P_out')
805 */
806 #undef __FUNCT__
807 #define __FUNCT__ "PCGAMGcoarsen_AGG"
808 PetscErrorCode PCGAMGcoarsen_AGG( PC pc,
809                                   const Mat Gmat2,
810                                   IS *a_selected,
811                                   IS *a_llist_parent
812                                   )
813 {
814   PetscErrorCode ierr;
815   PC_MG          *mg = (PC_MG*)pc->data;
816   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
817   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
818   Mat             Gmat1; /* unsquared graph (not symetrized!) */
819   IS              perm, selected, llist_parent;
820   PetscInt        Ii,nloc,bs,n,m;
821   PetscInt *permute;
822   PetscBool *bIndexSet;
823   /* MPI_Comm        wcomm = ((PetscObject)Gmat2)->comm; */
824   /* PetscMPIInt     mype,npe; */
825 
826   PetscFunctionBegin;
827   /* ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr); */
828   /* ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr); */
829   ierr = MatGetLocalSize( Gmat2, &n, &m ); CHKERRQ(ierr);
830   ierr = MatGetBlockSize( Gmat2, &bs ); CHKERRQ(ierr); assert(bs==1);
831   nloc = n/bs;
832 
833   /* get unsquared graph */
834   Gmat1 = pc_gamg_agg->aux_mat; pc_gamg_agg->aux_mat = 0;
835 
836   /* get MIS aggs */
837   /* randomize */
838   ierr = PetscMalloc( nloc*sizeof(PetscInt), &permute ); CHKERRQ(ierr);
839   ierr = PetscMalloc( nloc*sizeof(PetscBool), &bIndexSet ); CHKERRQ(ierr);
840   for ( Ii = 0; Ii < nloc ; Ii++ ){
841     bIndexSet[Ii] = PETSC_FALSE;
842     permute[Ii] = Ii;
843   }
844   srand(1); /* make deterministic */
845   for ( Ii = 0; Ii < nloc ; Ii++ ) {
846     PetscInt iSwapIndex = rand()%nloc;
847     if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
848       PetscInt iTemp = permute[iSwapIndex];
849       permute[iSwapIndex] = permute[Ii];
850       permute[Ii] = iTemp;
851       bIndexSet[iSwapIndex] = PETSC_TRUE;
852     }
853   }
854   ierr = PetscFree( bIndexSet );  CHKERRQ(ierr);
855 
856   ierr = ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm);
857   CHKERRQ(ierr);
858 
859   ierr = maxIndSetAgg( perm, Gmat2, PETSC_TRUE, pc_gamg->verbose, &selected, &llist_parent ); CHKERRQ(ierr);
860   ierr = ISDestroy( &perm );                    CHKERRQ(ierr);
861   ierr = PetscFree( permute );  CHKERRQ(ierr);
862 
863   /* smooth aggs */
864   ierr = smoothAggs( Gmat2, Gmat1, selected, llist_parent ); CHKERRQ(ierr);
865 
866   ierr = MatDestroy( &Gmat1 );  CHKERRQ(ierr);
867 
868   *a_selected = selected;
869   *a_llist_parent = llist_parent;
870 
871   PetscFunctionReturn(0);
872 }
873 
874 /* -------------------------------------------------------------------------- */
875 /*
876  PCGAMGprolongator_AGG
877 
878  Input Parameter:
879  . pc - this
880  . Amat - matrix on this fine level
881  . Graph - used to get ghost data for nodes in
882  . selected - [nselected inc. chosts]
883  . llist_parent - [nloc + Gmat.nghost] linked list
884  Output Parameter:
885  . a_P_out - prolongation operator to the next level
886  */
887 #undef __FUNCT__
888 #define __FUNCT__ "PCGAMGprolongator_AGG"
889 PetscErrorCode PCGAMGprolongator_AGG( PC pc,
890                                      const Mat Amat,
891                                      const Mat Gmat,
892                                      IS selected,
893                                      IS llist_parent,
894                                      Mat *a_P_out
895                                      )
896 {
897   PC_MG          *mg = (PC_MG*)pc->data;
898   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
899   const PetscInt verbose = pc_gamg->verbose;
900   const PetscInt data_cols = pc_gamg->data_cell_cols;
901   PetscErrorCode ierr;
902   PetscInt       Istart,Iend,nloc,ii,jj,kk,my0,nLocalSelected,bs;
903   Mat            Prol;
904   PetscMPIInt    mype, npe;
905   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
906   const PetscInt *selected_idx,col_bs=data_cols;
907   PetscReal      *data_w_ghost;
908   PetscInt       myCrs0, nbnodes=0, *flid_fgid;
909 
910   PetscFunctionBegin;
911   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
912   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
913   ierr = MatGetOwnershipRange( Amat, &Istart, &Iend ); CHKERRQ(ierr);
914   ierr  = MatGetBlockSize( Amat, &bs ); CHKERRQ( ierr );
915   nloc = (Iend-Istart)/bs; my0 = Istart/bs; assert((Iend-Istart)%bs==0);
916 
917   /* get 'nLocalSelected' */
918   ierr = ISGetSize( selected, &kk );        CHKERRQ(ierr);
919   ierr = ISGetIndices( selected, &selected_idx );     CHKERRQ(ierr);
920   for(ii=0,nLocalSelected=0;ii<kk;ii++){
921     PetscInt lid = selected_idx[ii];
922     if(lid<nloc) nLocalSelected++;
923   }
924   ierr = ISRestoreIndices( selected, &selected_idx );     CHKERRQ(ierr);
925 
926   /* create prolongator, create P matrix */
927   ierr = MatCreateMPIAIJ(wcomm,
928                          nloc*bs, nLocalSelected*col_bs,
929                          PETSC_DETERMINE, PETSC_DETERMINE,
930                          data_cols, PETSC_NULL, data_cols, PETSC_NULL,
931                          &Prol );
932   CHKERRQ(ierr);
933 
934   /* can get all points "removed" */
935   ierr =  MatGetSize( Prol, &kk, &ii ); CHKERRQ(ierr);
936   if( ii==0 ) {
937     if( verbose ) {
938       PetscPrintf(wcomm,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__);
939     }
940     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
941     *a_P_out = PETSC_NULL;  /* out */
942     PetscFunctionReturn(0);
943   }
944   if( verbose ) {
945     PetscPrintf(PETSC_COMM_WORLD,"\t\t[%d]%s New grid %d nodes\n",mype,__FUNCT__,ii/bs);
946   }
947   ierr = MatGetOwnershipRangeColumn( Prol, &myCrs0, &kk ); CHKERRQ(ierr);
948   myCrs0 = myCrs0/col_bs;
949 
950   /* create global vector of data in 'data_w_ghost' */
951 #if defined PETSC_USE_LOG
952   ierr = PetscLogEventBegin(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
953 #endif
954   if (npe > 1) { /*  */
955     PetscReal *tmp_gdata,*tmp_ldata,*tp2;
956     ierr = PetscMalloc( nloc*sizeof(PetscReal), &tmp_ldata ); CHKERRQ(ierr);
957     for( jj = 0 ; jj < data_cols ; jj++ ){
958       for( kk = 0 ; kk < bs ; kk++) {
959         PetscInt ii,nnodes;
960         const PetscReal *tp = pc_gamg->data + jj*bs*nloc + kk;
961         for( ii = 0 ; ii < nloc ; ii++, tp += bs ){
962           tmp_ldata[ii] = *tp;
963         }
964         ierr = getDataWithGhosts( Gmat, 1, tmp_ldata, &nnodes, &tmp_gdata );
965         CHKERRQ(ierr);
966         if(jj==0 && kk==0) { /* now I know how many todal nodes - allocate */
967           ierr = PetscMalloc( nnodes*bs*data_cols*sizeof(PetscReal), &data_w_ghost ); CHKERRQ(ierr);
968           nbnodes = bs*nnodes;
969         }
970         tp2 = data_w_ghost + jj*bs*nnodes + kk;
971         for( ii = 0 ; ii < nnodes ; ii++, tp2 += bs ){
972           *tp2 = tmp_gdata[ii];
973         }
974         ierr = PetscFree( tmp_gdata ); CHKERRQ(ierr);
975       }
976     }
977     ierr = PetscFree( tmp_ldata ); CHKERRQ(ierr);
978   }
979   else {
980     nbnodes = bs*nloc;
981     data_w_ghost = (PetscReal*)pc_gamg->data;
982   }
983 
984   /* get P0 */
985   if( npe > 1 ){
986     PetscReal *fid_glid_loc,*fiddata;
987     PetscInt nnodes;
988 
989     ierr = PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc ); CHKERRQ(ierr);
990     for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
991     ierr = getDataWithGhosts(Gmat, 1, fid_glid_loc, &nnodes, &fiddata);
992     CHKERRQ(ierr);
993     ierr = PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
994     for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
995     ierr = PetscFree( fiddata ); CHKERRQ(ierr);
996     assert(nnodes==nbnodes/bs);
997     ierr = PetscFree( fid_glid_loc ); CHKERRQ(ierr);
998   }
999   else {
1000     ierr = PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
1001     for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk;
1002   }
1003 #if defined PETSC_USE_LOG
1004   ierr = PetscLogEventEnd(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
1005   ierr = PetscLogEventBegin(gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1006 #endif
1007   {
1008     PetscReal *data_out;
1009     ierr = formProl0( selected, llist_parent, bs, data_cols, myCrs0, nbnodes,
1010                       data_w_ghost, flid_fgid, &data_out, Prol );
1011     CHKERRQ(ierr);
1012     ierr = PetscFree( pc_gamg->data ); CHKERRQ( ierr );
1013     pc_gamg->data = data_out;
1014     pc_gamg->data_cell_rows = data_cols;
1015     pc_gamg->data_sz = data_cols*data_cols*nLocalSelected;
1016   }
1017  #if defined PETSC_USE_LOG
1018   ierr = PetscLogEventEnd(gamg_setup_events[SET8],0,0,0,0);CHKERRQ(ierr);
1019 #endif
1020   if (npe > 1) ierr = PetscFree( data_w_ghost );      CHKERRQ(ierr);
1021   ierr = PetscFree( flid_fgid ); CHKERRQ(ierr);
1022 
1023   /* attach block size of columns */
1024   if( pc_gamg->col_bs_id == -1 ) {
1025     ierr = PetscObjectComposedDataRegister( &pc_gamg->col_bs_id ); assert(pc_gamg->col_bs_id != -1 );
1026   }
1027   ierr = PetscObjectComposedDataSetInt( (PetscObject)Prol, pc_gamg->col_bs_id, data_cols ); CHKERRQ(ierr);
1028 
1029   *a_P_out = Prol;  /* out */
1030 
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 /* -------------------------------------------------------------------------- */
1035 /*
1036    PCGAMGoptprol_AGG
1037 
1038   Input Parameter:
1039    . pc - this
1040    . Amat - matrix on this fine level
1041  In/Output Parameter:
1042    . a_P_out - prolongation operator to the next level
1043 */
1044 #undef __FUNCT__
1045 #define __FUNCT__ "PCGAMGoptprol_AGG"
1046 PetscErrorCode PCGAMGoptprol_AGG( PC pc,
1047                                   const Mat Amat,
1048                                   Mat *a_P
1049                                   )
1050 {
1051   PetscErrorCode ierr;
1052   PC_MG          *mg = (PC_MG*)pc->data;
1053   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1054   const PetscInt verbose = pc_gamg->verbose;
1055   PC_GAMG_AGG    *pc_gamg_agg = (PC_GAMG_AGG*)pc_gamg->subctx;
1056   PetscInt       jj;
1057   PetscMPIInt    mype,npe;
1058   Mat            Prol = *a_P;
1059   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
1060 
1061   PetscFunctionBegin;
1062   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
1063   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
1064 
1065   /* smooth P0 */
1066   for( jj = 0 ; jj < pc_gamg_agg->nsmooths ; jj++ ){
1067     Mat tMat;
1068     Vec diag;
1069     PetscReal alpha, emax, emin;
1070 #if defined PETSC_USE_LOG
1071     ierr = PetscLogEventBegin(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1072 #endif
1073     if( jj == 0 ) {
1074       KSP eksp;
1075       Vec bb, xx;
1076       PC pc;
1077       ierr = MatGetVecs( Amat, &bb, 0 );         CHKERRQ(ierr);
1078       ierr = MatGetVecs( Amat, &xx, 0 );         CHKERRQ(ierr);
1079       {
1080         PetscRandom    rctx;
1081         ierr = PetscRandomCreate(wcomm,&rctx);CHKERRQ(ierr);
1082         ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
1083         ierr = VecSetRandom(bb,rctx);CHKERRQ(ierr);
1084         ierr = PetscRandomDestroy( &rctx ); CHKERRQ(ierr);
1085       }
1086       ierr = KSPCreate(wcomm,&eksp);                            CHKERRQ(ierr);
1087       ierr = KSPAppendOptionsPrefix( eksp, "est_");         CHKERRQ(ierr);
1088       ierr = KSPSetFromOptions( eksp );    CHKERRQ(ierr);
1089       ierr = KSPSetInitialGuessNonzero( eksp, PETSC_FALSE );    CHKERRQ(ierr);
1090       ierr = KSPSetOperators( eksp, Amat, Amat, SAME_NONZERO_PATTERN );
1091       CHKERRQ( ierr );
1092       ierr = KSPGetPC( eksp, &pc );                              CHKERRQ( ierr );
1093       ierr = PCSetType( pc, PCJACOBI ); CHKERRQ(ierr);  /* smoother */
1094       ierr = KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10);
1095       CHKERRQ(ierr);
1096       ierr = KSPSetNormType( eksp, KSP_NORM_NONE );                 CHKERRQ(ierr);
1097       ierr = KSPSetComputeSingularValues( eksp,PETSC_TRUE );        CHKERRQ(ierr);
1098 
1099       /* solve - keep stuff out of logging */
1100       ierr = PetscLogEventDeactivate(KSP_Solve);CHKERRQ(ierr);
1101       ierr = PetscLogEventDeactivate(PC_Apply);CHKERRQ(ierr);
1102       ierr = KSPSolve( eksp, bb, xx );                              CHKERRQ(ierr);
1103       ierr = PetscLogEventActivate(KSP_Solve);CHKERRQ(ierr);
1104       ierr = PetscLogEventActivate(PC_Apply);CHKERRQ(ierr);
1105 
1106       ierr = KSPComputeExtremeSingularValues( eksp, &emax, &emin ); CHKERRQ(ierr);
1107       if( verbose ) {
1108         PetscPrintf(wcomm,"\t\t\t%s smooth P0: max eigen=%e min=%e PC=%s\n",
1109                     __FUNCT__,emax,emin,PCJACOBI);
1110       }
1111       ierr = VecDestroy( &xx );       CHKERRQ(ierr);
1112       ierr = VecDestroy( &bb );       CHKERRQ(ierr);
1113       ierr = KSPDestroy( &eksp );     CHKERRQ(ierr);
1114 
1115       if( pc_gamg->emax_id == -1 ) {
1116         ierr = PetscObjectComposedDataRegister( &pc_gamg->emax_id );
1117         assert(pc_gamg->emax_id != -1 );
1118       }
1119       ierr = PetscObjectComposedDataSetScalar( (PetscObject)Amat, pc_gamg->emax_id, emax ); CHKERRQ(ierr);
1120     }
1121 
1122     /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
1123     ierr = MatMatMult( Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat );   CHKERRQ(ierr);
1124     ierr = MatGetVecs( Amat, &diag, 0 );    CHKERRQ(ierr);
1125     ierr = MatGetDiagonal( Amat, diag );    CHKERRQ(ierr); /* effectively PCJACOBI */
1126     ierr = VecReciprocal( diag );         CHKERRQ(ierr);
1127     ierr = MatDiagonalScale( tMat, diag, 0 ); CHKERRQ(ierr);
1128     ierr = VecDestroy( &diag );           CHKERRQ(ierr);
1129     alpha = -1.5/emax;
1130     ierr = MatAYPX( tMat, alpha, Prol, SUBSET_NONZERO_PATTERN );           CHKERRQ(ierr);
1131     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
1132     Prol = tMat;
1133 #if defined PETSC_USE_LOG
1134     ierr = PetscLogEventEnd(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
1135 #endif
1136   }
1137 
1138   *a_P = Prol;
1139 
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /* -------------------------------------------------------------------------- */
1144 /*
1145    PCCreateGAMG_AGG
1146 
1147   Input Parameter:
1148    . pc -
1149 */
1150 #undef __FUNCT__
1151 #define __FUNCT__ "PCCreateGAMG_AGG"
1152 PetscErrorCode  PCCreateGAMG_AGG( PC pc )
1153 {
1154   PetscErrorCode  ierr;
1155   PC_MG           *mg = (PC_MG*)pc->data;
1156   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
1157   PC_GAMG_AGG      *pc_gamg_agg;
1158 
1159   PetscFunctionBegin;
1160   /* create sub context for SA */
1161   ierr = PetscNewLog( pc, PC_GAMG_AGG, &pc_gamg_agg ); CHKERRQ(ierr);
1162   assert(!pc_gamg->subctx);
1163   pc_gamg->subctx = pc_gamg_agg;
1164 
1165   pc->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1166   pc->ops->destroy        = PCDestroy_AGG;
1167   /* reset does not do anything; setup not virtual */
1168 
1169   /* set internal function pointers */
1170   pc_gamg->graph = PCGAMGgraph_AGG;
1171   pc_gamg->coarsen = PCGAMGcoarsen_AGG;
1172   pc_gamg->prolongator = PCGAMGprolongator_AGG;
1173   pc_gamg->optprol = PCGAMGoptprol_AGG;
1174 
1175   pc_gamg->createdefaultdata = PCSetData_AGG;
1176 
1177   ierr = PetscObjectComposeFunctionDynamic( (PetscObject)pc,
1178                                             "PCSetCoordinates_C",
1179                                             "PCSetCoordinates_AGG",
1180                                             PCSetCoordinates_AGG);
1181   PetscFunctionReturn(0);
1182 }
1183