xref: /petsc/src/ksp/pc/impls/gamg/agg.c (revision bf1d350efdc282becf9b0ffa034f3f1a853b9d50)
1 /*
2  GAMG geometric-algebric multiogrid PC - Mark Adams 2011
3  */
4 
5 #include <../src/ksp/pc/impls/gamg/gamg.h>        /*I "petscpc.h" I*/
6 #include <private/kspimpl.h>
7 
8 #include <assert.h>
9 #include <petscblaslapack.h>
10 
11 typedef struct {
12   PetscInt smooths;
13 } PC_GAMG_AGG;
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "PCGAMGSetNSmooths"
17 /*@
18    PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical)
19 
20    Not Collective on PC
21 
22    Input Parameters:
23 .  pc - the preconditioner context
24 
25    Options Database Key:
26 .  -pc_gamg_agg_nsmooths
27 
28    Level: intermediate
29 
30    Concepts: Aggregation AMG preconditioner
31 
32 .seealso: ()
33 @*/
34 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
35 {
36   PetscErrorCode ierr;
37 
38   PetscFunctionBegin;
39   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
40   ierr = PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "PCGAMGSetNSmooths_GAMG"
47 PetscErrorCode PCGAMGSetNSmooths_GAMG(PC pc, PetscInt n)
48 {
49   PC_MG           *mg = (PC_MG*)pc->data;
50   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
51   PC_GAMG_AGG      *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx;
52 
53   PetscFunctionBegin;
54   assert(n>=0);
55   pc_gamg_sa->smooths = n;
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 /* -------------------------------------------------------------------------- */
61 /*
62    PCSetFromOptions_GAMG_AGG
63 
64   Input Parameter:
65    . pc -
66 */
67 #undef __FUNCT__
68 #define __FUNCT__ "PCSetFromOptions_GAMG_AGG"
69 PetscErrorCode PCSetFromOptions_GAMG_AGG( PC pc )
70 {
71   PetscErrorCode  ierr;
72   PC_MG           *mg = (PC_MG*)pc->data;
73   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
74   PC_GAMG_AGG      *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx;
75   PetscBool        flag;
76 
77   PetscFunctionBegin;
78   /* call base class */
79   ierr = PCSetFromOptions_GAMG( pc ); CHKERRQ(ierr);
80 
81   ierr = PetscOptionsHead("GAMG-AGG options"); CHKERRQ(ierr);
82   {
83     /* -pc_gamg_agg_nsmooths */
84     pc_gamg_sa->smooths = 0;
85     ierr = PetscOptionsInt("-pc_gamg_agg_nsmooths",
86                            "smoothing steps for smoothed aggregation, usually 1 (0)",
87                            "PCGAMGSetNSmooths",
88                            pc_gamg_sa->smooths,
89                            &pc_gamg_sa->smooths,
90                            &flag);
91     CHKERRQ(ierr);
92   }
93   ierr = PetscOptionsTail();CHKERRQ(ierr);
94 
95   if( pc_gamg->verbose ) {
96     PetscPrintf(PETSC_COMM_WORLD,"[%d]%s done\n",0,__FUNCT__);
97   }
98 
99   PetscFunctionReturn(0);
100 }
101 
102 /* -------------------------------------------------------------------------- */
103 /*
104    PCDestroy_AGG
105 
106   Input Parameter:
107    . pc -
108 */
109 #undef __FUNCT__
110 #define __FUNCT__ "PCDestroy_AGG"
111 PetscErrorCode PCDestroy_AGG( PC pc )
112 {
113   PetscErrorCode  ierr;
114   PC_MG           *mg = (PC_MG*)pc->data;
115   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
116   PC_GAMG_AGG      *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx;
117 
118   PetscFunctionBegin;
119   if( pc_gamg_sa ) {
120     ierr = PetscFree(pc_gamg_sa);CHKERRQ(ierr);
121     pc_gamg_sa = 0;
122   }
123 
124   /* call base class */
125   ierr = PCDestroy_GAMG( pc );CHKERRQ(ierr);
126 
127   PetscFunctionReturn(0);
128 }
129 
130 /* -------------------------------------------------------------------------- */
131 /*
132    PCSetCoordinates_AGG
133 
134    Input Parameter:
135    .  pc - the preconditioner context
136 */
137 EXTERN_C_BEGIN
138 #undef __FUNCT__
139 #define __FUNCT__ "PCSetCoordinates_AGG"
140 PetscErrorCode PCSetCoordinates_AGG( PC pc, PetscInt ndm, PetscReal *coords )
141 {
142   PC_MG          *mg = (PC_MG*)pc->data;
143   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
144   PetscErrorCode ierr;
145   PetscInt       arrsz,bs,my0,kk,ii,jj,nloc,Iend;
146   Mat            Amat = pc->pmat;
147 
148   PetscFunctionBegin;
149   PetscValidHeaderSpecific( Amat, MAT_CLASSID, 1 );
150   ierr  = MatGetBlockSize( Amat, &bs );               CHKERRQ( ierr );
151   ierr  = MatGetOwnershipRange( Amat, &my0, &Iend ); CHKERRQ(ierr);
152   nloc = (Iend-my0)/bs;
153   if((Iend-my0)%bs!=0) SETERRQ1(((PetscObject)Amat)->comm,PETSC_ERR_ARG_WRONG, "Bad local size %d.",nloc);
154 
155   /* SA: null space vectors */
156   if( coords && bs==1 ) pc_gamg->data_cols = 1; /* scalar w/ coords and SA (not needed) */
157   else if( coords ) pc_gamg->data_cols = (ndm==2 ? 3 : 6); /* elasticity */
158   else pc_gamg->data_cols = bs; /* no data, force SA with constant null space vectors */
159   pc_gamg->data_rows = bs;
160 
161   arrsz = nloc*pc_gamg->data_rows*pc_gamg->data_cols;
162 
163   /* create data - syntactic sugar that should be refactored at some point */
164   if (pc_gamg->data==0 || (pc_gamg->data_sz != arrsz)) {
165     ierr = PetscFree( pc_gamg->data );  CHKERRQ(ierr);
166     ierr = PetscMalloc((arrsz+1)*sizeof(PetscReal), &pc_gamg->data ); CHKERRQ(ierr);
167   }
168   for(kk=0;kk<arrsz;kk++)pc_gamg->data[kk] = -999999999.;
169   pc_gamg->data[arrsz] = -99.;
170   /* copy data in - column oriented */
171   for(kk=0;kk<nloc;kk++){
172     const PetscInt M = Iend - my0;
173     PetscReal *data = &pc_gamg->data[kk*bs];
174     if( pc_gamg->data_cols==1 ) *data = 1.0;
175     else {
176       for(ii=0;ii<bs;ii++)
177         for(jj=0;jj<bs;jj++)
178           if(ii==jj)data[ii*M + jj] = 1.0; /* translational modes */
179           else data[ii*M + jj] = 0.0;
180       if( coords ) {
181         if( ndm == 2 ){ /* rotational modes */
182           data += 2*M;
183           data[0] = -coords[2*kk+1];
184           data[1] =  coords[2*kk];
185         }
186         else {
187           data += 3*M;
188           data[0] = 0.0;               data[M+0] =  coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1];
189           data[1] = -coords[3*kk+2]; data[M+1] = 0.0;               data[2*M+1] =  coords[3*kk];
190           data[2] =  coords[3*kk+1]; data[M+2] = -coords[3*kk];   data[2*M+2] = 0.0;
191         }
192       }
193     }
194   }
195   assert(pc_gamg->data[arrsz] == -99.);
196 
197   pc_gamg->data_sz = arrsz;
198 
199   PetscFunctionReturn(0);
200 }
201 EXTERN_C_END
202 
203 
204 /* -------------------------------------------------------------------------- */
205 /*
206    PCSetData_AGG
207 
208   Input Parameter:
209    . pc -
210 */
211 #undef __FUNCT__
212 #define __FUNCT__ "PCSetData_AGG"
213 PetscErrorCode PCSetData_AGG( PC pc )
214 {
215   PetscErrorCode  ierr;
216   PetscFunctionBegin;
217   ierr = PCSetCoordinates_AGG( pc, -1, PETSC_NULL ); CHKERRQ(ierr);
218   PetscFunctionReturn(0);
219 }
220 
221 /* -------------------------------------------------------------------------- */
222 /*
223    PCCreateGAMG_AGG
224 
225   Input Parameter:
226    . pc -
227 */
228 #undef __FUNCT__
229 #define __FUNCT__ "PCCreateGAMG_AGG"
230 PetscErrorCode  PCCreateGAMG_AGG( PC pc )
231 {
232   PetscErrorCode  ierr;
233   PC_MG           *mg = (PC_MG*)pc->data;
234   PC_GAMG         *pc_gamg = (PC_GAMG*)mg->innerctx;
235   PC_GAMG_AGG      *pc_gamg_sa;
236 
237   PetscFunctionBegin;
238   /* create sub context for SA */
239   ierr = PetscNewLog( pc, PC_GAMG_AGG, &pc_gamg_sa ); CHKERRQ(ierr);
240   assert(!pc_gamg->subctx);
241   pc_gamg->subctx = pc_gamg_sa;
242 
243   pc->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
244   pc->ops->destroy        = PCDestroy_AGG;
245   /* reset does not do anything; setup not virtual */
246 
247   /* set internal function pointers */
248   pc_gamg->createprolongator = PCGAMGcreateProl_AGG;
249   pc_gamg->createdefaultdata = PCSetData_AGG;
250 
251   ierr = PetscObjectComposeFunctionDynamic( (PetscObject)pc,
252                                             "PCSetCoordinates_C",
253                                             "PCSetCoordinates_AGG",
254                                             PCSetCoordinates_AGG);
255   PetscFunctionReturn(0);
256 }
257 
258 /* -------------------------------------------------------------------------- */
259 /*
260  formProl0
261 
262    Input Parameter:
263    . selected - list of selected local ID, includes selected ghosts
264    . locals_llist - linked list with aggregates
265    . bs - block size
266    . nSAvec - num columns of new P
267    . my0crs - global index of locals
268    . data_stride - bs*(nloc nodes + ghost nodes)
269    . data_in[data_stride*nSAvec] - local data on fine grid
270    . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
271   Output Parameter:
272    . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
273    . a_Prol - prolongation operator
274 */
275 #undef __FUNCT__
276 #define __FUNCT__ "formProl0"
277 PetscErrorCode formProl0(IS selected, /* list of selected local ID, includes selected ghosts */
278                          IS locals_llist, /* linked list from selected vertices of aggregate unselected vertices */
279                          const PetscInt bs,
280                          const PetscInt nSAvec,
281                          const PetscInt my0crs,
282 			 const PetscInt data_stride,
283 			 PetscReal data_in[],
284                          const PetscInt flid_fgid[],
285                          PetscReal **a_data_out,
286                          Mat a_Prol /* prolongation operator (output)*/
287                          )
288 {
289   PetscErrorCode ierr;
290   PetscInt  Istart,Iend,nFineLoc,clid,flid,aggID,kk,jj,ii,nLocalSelected,ndone,nSelected;
291   MPI_Comm       wcomm = ((PetscObject)a_Prol)->comm;
292   PetscMPIInt    mype, npe;
293   const PetscInt *selected_idx,*llist_idx;
294   PetscReal      *out_data;
295 
296   PetscFunctionBegin;
297   ierr = MPI_Comm_rank(wcomm,&mype);CHKERRQ(ierr);
298   ierr = MPI_Comm_size(wcomm,&npe);CHKERRQ(ierr);
299   ierr = MatGetOwnershipRange( a_Prol, &Istart, &Iend );    CHKERRQ(ierr);
300   nFineLoc = (Iend-Istart)/bs; assert((Iend-Istart)%bs==0);
301 
302   ierr = ISGetLocalSize( selected, &nSelected );        CHKERRQ(ierr);
303   ierr = ISGetIndices( selected, &selected_idx );       CHKERRQ(ierr);
304   for(kk=0,nLocalSelected=0;kk<nSelected;kk++){
305     PetscInt lid = selected_idx[kk];
306     if(lid<nFineLoc) nLocalSelected++;
307   }
308 
309   /* aloc space for coarse point data (output) */
310 #define DATA_OUT_STRIDE (nLocalSelected*nSAvec)
311   ierr = PetscMalloc( (DATA_OUT_STRIDE*nSAvec+1)*sizeof(PetscReal), &out_data ); CHKERRQ(ierr);
312   for(ii=0;ii<DATA_OUT_STRIDE*nSAvec+1;ii++) out_data[ii]=1.e300;
313   *a_data_out = out_data; /* output - stride nLocalSelected*nSAvec */
314 
315   /* find points and set prolongation */
316   ndone = 0;
317   ierr = ISGetIndices( locals_llist, &llist_idx );      CHKERRQ(ierr);
318   for( clid = 0 ; clid < nLocalSelected ; clid++ ){
319     PetscInt cgid = my0crs + clid, cids[100];
320 
321     /* count agg */
322     aggID = 0;
323     flid = selected_idx[clid]; assert(flid != -1);
324     do{
325       aggID++;
326     } while( (flid=llist_idx[flid]) != -1 );
327 
328     /* get block */
329     {
330       PetscBLASInt   asz=aggID,M=asz*bs,N=nSAvec,INFO;
331       PetscBLASInt   Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*bs;
332       PetscScalar    *qqc,*qqr,*TAU,*WORK;
333       PetscInt       *fids;
334 
335       ierr = PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc ); CHKERRQ(ierr);
336       ierr = PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr ); CHKERRQ(ierr);
337       ierr = PetscMalloc( N*sizeof(PetscScalar), &TAU ); CHKERRQ(ierr);
338       ierr = PetscMalloc( LWORK*sizeof(PetscScalar), &WORK ); CHKERRQ(ierr);
339       ierr = PetscMalloc( M*sizeof(PetscInt), &fids ); CHKERRQ(ierr);
340 
341       flid = selected_idx[clid];
342       aggID = 0;
343       do{
344         /* copy in B_i matrix - column oriented */
345         PetscReal *data = &data_in[flid*bs];
346         for( kk = ii = 0; ii < bs ; ii++ ) {
347           for( jj = 0; jj < N ; jj++ ) {
348             qqc[jj*Mdata + aggID*bs + ii] = data[jj*data_stride + ii];
349           }
350         }
351 
352         /* set fine IDs */
353         for(kk=0;kk<bs;kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk;
354 
355         aggID++;
356       }while( (flid=llist_idx[flid]) != -1 );
357 
358       /* pad with zeros */
359       for( ii = asz*bs; ii < Mdata ; ii++ ) {
360 	for( jj = 0; jj < N ; jj++, kk++ ) {
361 	  qqc[jj*Mdata + ii] = .0;
362 	}
363       }
364 
365       ndone += aggID;
366       /* QR */
367       LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
368       if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error");
369       /* get R - column oriented - output B_{i+1} */
370       {
371         PetscReal *data = &out_data[clid*nSAvec];
372         for( jj = 0; jj < nSAvec ; jj++ ) {
373           for( ii = 0; ii < nSAvec ; ii++ ) {
374             assert(data[jj*DATA_OUT_STRIDE + ii] == 1.e300);
375             if( ii <= jj ) data[jj*DATA_OUT_STRIDE + ii] = PetscRealPart(qqc[jj*Mdata + ii]);
376 	    else data[jj*DATA_OUT_STRIDE + ii] = 0.;
377           }
378         }
379       }
380 
381       /* get Q - row oriented */
382       LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO );
383       if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO);
384 
385       for( ii = 0 ; ii < M ; ii++ ){
386         for( jj = 0 ; jj < N ; jj++ ) {
387           qqr[N*ii + jj] = qqc[jj*Mdata + ii];
388         }
389       }
390 
391       /* add diagonal block of P0 */
392       for(kk=0;kk<N;kk++) cids[kk] = N*cgid + kk; /* global col IDs in P0 */
393       ierr = MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES); CHKERRQ(ierr);
394 
395       ierr = PetscFree( qqc );  CHKERRQ(ierr);
396       ierr = PetscFree( qqr );  CHKERRQ(ierr);
397       ierr = PetscFree( TAU );  CHKERRQ(ierr);
398       ierr = PetscFree( WORK );  CHKERRQ(ierr);
399       ierr = PetscFree( fids );  CHKERRQ(ierr);
400     } /* scoping */
401   } /* for all coarse nodes */
402   assert(out_data[nSAvec*DATA_OUT_STRIDE]==1.e300);
403 
404 /* ierr = MPI_Allreduce( &ndone, &ii, 1, MPIU_INT, MPIU_SUM, wcomm ); /\* synchronous version *\/ */
405 /* MatGetSize( a_Prol, &kk, &jj ); */
406 /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, N=%d (%d local done)\n",mype,__FUNCT__,ii,kk/bs,ndone); */
407 
408   ierr = ISRestoreIndices( selected, &selected_idx );     CHKERRQ(ierr);
409   ierr = ISRestoreIndices( locals_llist, &llist_idx );     CHKERRQ(ierr);
410   ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
411   ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
412 
413   PetscFunctionReturn(0);
414 }
415 
416 /* -------------------------------------------------------------------------- */
417 /*
418    PCGAMGcreateProl_AGG
419 
420   Input Parameter:
421    . pc - this
422    . Amat - matrix on this fine level
423    . data[nloc*data_sz(in)]
424   Output Parameter:
425    . a_P_out - prolongation operator to the next level
426    . a_data_out - data of coarse grid points (num local columns in 'a_P_out')
427 */
428 #undef __FUNCT__
429 #define __FUNCT__ "PCGAMGcreateProl_AGG"
430 PetscErrorCode PCGAMGcreateProl_AGG( PC pc,
431                                     const Mat Amat,
432                                     const PetscReal data[],
433                                     Mat *a_P_out,
434                                     PetscReal **a_data_out
435                                    )
436 {
437   PC_MG          *mg = (PC_MG*)pc->data;
438   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
439   PC_GAMG_AGG    *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx;
440   const PetscInt verbose = pc_gamg->verbose;
441   const PetscInt data_cols = pc_gamg->data_cols;
442   PetscErrorCode ierr;
443   PetscInt       Istart,Iend,nloc,jj,kk,my0,nLocalSelected,NN,MM,bs_in;
444   Mat            Prol, Gmat, AuxMat;
445   PetscMPIInt    mype, npe;
446   MPI_Comm       wcomm = ((PetscObject)Amat)->comm;
447   IS             permIS, llist_1, selected_1;
448   const PetscInt *selected_idx,col_bs=data_cols;
449 
450   PetscFunctionBegin;
451   ierr = MPI_Comm_rank( wcomm, &mype);  CHKERRQ(ierr);
452   ierr = MPI_Comm_size( wcomm, &npe);   CHKERRQ(ierr);
453   ierr = MatGetOwnershipRange( Amat, &Istart, &Iend ); CHKERRQ(ierr);
454   ierr = MatGetSize( Amat, &MM, &NN );  CHKERRQ(ierr);
455   ierr  = MatGetBlockSize( Amat, &bs_in ); CHKERRQ( ierr );
456   nloc = (Iend-Istart)/bs_in; my0 = Istart/bs_in; assert((Iend-Istart)%bs_in==0);
457 
458   ierr = createGraph( pc, Amat, &Gmat, &AuxMat, &permIS ); CHKERRQ(ierr);
459 
460   /* SELECT COARSE POINTS */
461 #if defined PETSC_USE_LOG
462   ierr = PetscLogEventBegin(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
463 #endif
464 
465   ierr = maxIndSetAgg( permIS, Gmat, AuxMat, PETSC_TRUE, &selected_1, &llist_1 );
466   CHKERRQ(ierr);
467   ierr = MatDestroy( &AuxMat );  CHKERRQ(ierr);
468 
469 #if defined PETSC_USE_LOG
470   ierr = PetscLogEventEnd(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr);
471 #endif
472   ierr = ISDestroy(&permIS); CHKERRQ(ierr);
473 
474   /* get 'nLocalSelected' */
475   ierr = ISGetLocalSize( selected_1, &NN );        CHKERRQ(ierr);
476   ierr = ISGetIndices( selected_1, &selected_idx );     CHKERRQ(ierr);
477   for(kk=0,nLocalSelected=0;kk<NN;kk++){
478     PetscInt lid = selected_idx[kk];
479     if(lid<nloc) nLocalSelected++;
480   }
481   ierr = ISRestoreIndices( selected_1, &selected_idx );     CHKERRQ(ierr);
482 
483   /* create prolongator, create P matrix */
484   ierr = MatCreateMPIAIJ(wcomm,
485                          nloc*bs_in, nLocalSelected*col_bs,
486                          PETSC_DETERMINE, PETSC_DETERMINE,
487                          data_cols, PETSC_NULL, data_cols, PETSC_NULL,
488                          &Prol );
489   CHKERRQ(ierr);
490 
491   /* can get all points "removed" */
492   ierr =  MatGetSize( Prol, &kk, &NN ); CHKERRQ(ierr);
493   if( NN==0 ) {
494     if( verbose ) {
495       PetscPrintf(PETSC_COMM_WORLD,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__);
496     }
497     ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
498     ierr = ISDestroy( &llist_1 ); CHKERRQ(ierr);
499     ierr = ISDestroy( &selected_1 ); CHKERRQ(ierr);
500     ierr = MatDestroy( &Gmat );  CHKERRQ(ierr);
501     *a_P_out = PETSC_NULL;  /* out */
502     PetscFunctionReturn(0);
503   }
504 
505   { /* SA */
506     PetscReal *data_w_ghost;
507     PetscInt  myCrs0, nbnodes=0, *flid_fgid;
508 
509     /* create global vector of data in 'data_w_ghost' */
510 #if defined PETSC_USE_LOG
511     ierr = PetscLogEventBegin(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
512 #endif
513     if (npe > 1) {
514       PetscReal *tmp_gdata,*tmp_ldata,*tp2;
515 
516       ierr = PetscMalloc( nloc*sizeof(PetscReal), &tmp_ldata ); CHKERRQ(ierr);
517       for( jj = 0 ; jj < data_cols ; jj++ ){
518         for( kk = 0 ; kk < bs_in ; kk++) {
519           PetscInt ii,nnodes;
520           const PetscReal *tp = data + jj*bs_in*nloc + kk;
521           for( ii = 0 ; ii < nloc ; ii++, tp += bs_in ){
522             tmp_ldata[ii] = *tp;
523           }
524           ierr = getDataWithGhosts( Gmat, 1, tmp_ldata, &nnodes, &tmp_gdata );
525           CHKERRQ(ierr);
526           if(jj==0 && kk==0) { /* now I know how many todal nodes - allocate */
527             ierr = PetscMalloc( nnodes*bs_in*data_cols*sizeof(PetscReal), &data_w_ghost ); CHKERRQ(ierr);
528             nbnodes = bs_in*nnodes;
529           }
530           tp2 = data_w_ghost + jj*bs_in*nnodes + kk;
531           for( ii = 0 ; ii < nnodes ; ii++, tp2 += bs_in ){
532             *tp2 = tmp_gdata[ii];
533           }
534           ierr = PetscFree( tmp_gdata ); CHKERRQ(ierr);
535         }
536       }
537       ierr = PetscFree( tmp_ldata ); CHKERRQ(ierr);
538     }
539     else {
540       nbnodes = bs_in*nloc;
541       data_w_ghost = (PetscReal*)data;
542     }
543 
544     /* scan my coarse zero gid */
545     MPI_Scan( &nLocalSelected, &myCrs0, 1, MPIU_INT, MPIU_SUM, wcomm );
546     myCrs0 -= nLocalSelected;
547 
548     /* get P0 */
549     if( npe > 1 ){
550       PetscReal *fid_glid_loc,*fiddata;
551       PetscInt nnodes;
552 
553       ierr = PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc ); CHKERRQ(ierr);
554       for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk);
555       ierr = getDataWithGhosts(Gmat, 1, fid_glid_loc, &nnodes, &fiddata);
556       CHKERRQ(ierr);
557       ierr = PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
558       for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
559       ierr = PetscFree( fiddata ); CHKERRQ(ierr);
560       assert(nnodes==nbnodes/bs_in);
561       ierr = PetscFree( fid_glid_loc ); CHKERRQ(ierr);
562     }
563     else {
564       ierr = PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr);
565       for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk;
566     }
567     ierr = MatDestroy( &Gmat );  CHKERRQ(ierr);
568 #if defined PETSC_USE_LOG
569     ierr = PetscLogEventEnd(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr);
570 #endif
571 
572     ierr = formProl0(selected_1,llist_1,bs_in,data_cols,myCrs0,nbnodes,
573                      data_w_ghost,flid_fgid,a_data_out,Prol);
574     CHKERRQ(ierr);
575 
576     if (npe > 1) ierr = PetscFree( data_w_ghost );      CHKERRQ(ierr);
577     ierr = PetscFree( flid_fgid ); CHKERRQ(ierr);
578 
579     /* smooth P0 */
580     for( jj = 0 ; jj < pc_gamg_sa->smooths ; jj++ ){
581       Mat tMat;
582       Vec diag;
583       PetscReal alpha, emax, emin;
584 #if defined PETSC_USE_LOG
585       ierr = PetscLogEventBegin(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
586 #endif
587       if( jj == 0 ) {
588         KSP eksp;
589         Vec bb, xx;
590         PC pc;
591         ierr = MatGetVecs( Amat, &bb, 0 );         CHKERRQ(ierr);
592         ierr = MatGetVecs( Amat, &xx, 0 );         CHKERRQ(ierr);
593         {
594           PetscRandom    rctx;
595           ierr = PetscRandomCreate(wcomm,&rctx);CHKERRQ(ierr);
596           ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);
597           ierr = VecSetRandom(bb,rctx);CHKERRQ(ierr);
598           ierr = PetscRandomDestroy( &rctx ); CHKERRQ(ierr);
599         }
600         ierr = KSPCreate(wcomm,&eksp);                            CHKERRQ(ierr);
601         /* ierr = KSPSetType( eksp, KSPCG );                         CHKERRQ(ierr); */
602         ierr = KSPAppendOptionsPrefix( eksp, "est_");         CHKERRQ(ierr);
603         ierr = KSPSetFromOptions( eksp );    CHKERRQ(ierr);
604         ierr = KSPSetInitialGuessNonzero( eksp, PETSC_FALSE );    CHKERRQ(ierr);
605         ierr = KSPSetOperators( eksp, Amat, Amat, SAME_NONZERO_PATTERN );
606         CHKERRQ( ierr );
607         ierr = KSPGetPC( eksp, &pc );                              CHKERRQ( ierr );
608         ierr = PCSetType( pc, PCJACOBI ); CHKERRQ(ierr);  /* smoother */
609         ierr = KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10);
610         CHKERRQ(ierr);
611         ierr = KSPSetNormType( eksp, KSP_NORM_NONE );                 CHKERRQ(ierr);
612         ierr = KSPSetComputeSingularValues( eksp,PETSC_TRUE );        CHKERRQ(ierr);
613 
614         /* solve - keep stuff out of logging */
615         ierr = PetscLogEventDeactivate(KSP_Solve);CHKERRQ(ierr);
616         ierr = PetscLogEventDeactivate(PC_Apply);CHKERRQ(ierr);
617         ierr = KSPSolve( eksp, bb, xx );                              CHKERRQ(ierr);
618         ierr = PetscLogEventActivate(KSP_Solve);CHKERRQ(ierr);
619         ierr = PetscLogEventActivate(PC_Apply);CHKERRQ(ierr);
620 
621         ierr = KSPComputeExtremeSingularValues( eksp, &emax, &emin ); CHKERRQ(ierr);
622         if( verbose ) {
623           PetscPrintf(PETSC_COMM_WORLD,"\t\t\t%s smooth P0: max eigen=%e min=%e PC=%s\n",
624                       __FUNCT__,emax,emin,PCJACOBI);
625         }
626         ierr = VecDestroy( &xx );       CHKERRQ(ierr);
627         ierr = VecDestroy( &bb );       CHKERRQ(ierr);
628         ierr = KSPDestroy( &eksp );     CHKERRQ(ierr);
629 
630         if( pc_gamg->emax_id == -1 ) {
631           ierr = PetscObjectComposedDataRegister( &pc_gamg->emax_id );
632           assert(pc_gamg->emax_id != -1 );
633         }
634         ierr = PetscObjectComposedDataSetScalar( (PetscObject)Amat, pc_gamg->emax_id, emax ); CHKERRQ(ierr);
635       }
636 
637       /* smooth P1 := (I - omega/lam D^{-1}A)P0 */
638       ierr = MatMatMult( Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat );   CHKERRQ(ierr);
639       ierr = MatGetVecs( Amat, &diag, 0 );    CHKERRQ(ierr);
640       ierr = MatGetDiagonal( Amat, diag );    CHKERRQ(ierr); /* effectively PCJACOBI */
641       ierr = VecReciprocal( diag );         CHKERRQ(ierr);
642       ierr = MatDiagonalScale( tMat, diag, 0 ); CHKERRQ(ierr);
643       ierr = VecDestroy( &diag );           CHKERRQ(ierr);
644       alpha = -1.5/emax;
645       ierr = MatAYPX( tMat, alpha, Prol, SUBSET_NONZERO_PATTERN );           CHKERRQ(ierr);
646       ierr = MatDestroy( &Prol );  CHKERRQ(ierr);
647       Prol = tMat;
648 #if defined PETSC_USE_LOG
649       ierr = PetscLogEventEnd(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr);
650 #endif
651     }
652   } /* scoping - SA code */
653 
654   /* attach block size of columns */
655   if( pc_gamg->col_bs_id == -1 ) {
656     ierr = PetscObjectComposedDataRegister( &pc_gamg->col_bs_id );
657     assert(pc_gamg->col_bs_id != -1 );
658   }
659   ierr = PetscObjectComposedDataSetInt( (PetscObject)Prol, pc_gamg->col_bs_id, data_cols ); CHKERRQ(ierr);
660 
661   *a_P_out = Prol;  /* out */
662 
663   ierr = ISDestroy( &llist_1 ); CHKERRQ(ierr);
664   ierr = ISDestroy( &selected_1 ); CHKERRQ(ierr);
665 
666   PetscFunctionReturn(0);
667 }
668