1 /* 2 GAMG geometric-algebric multiogrid PC - Mark Adams 2011 3 */ 4 5 #include <../src/ksp/pc/impls/gamg/gamg.h> /*I "petscpc.h" I*/ 6 #include <private/kspimpl.h> 7 8 #include <assert.h> 9 #include <petscblaslapack.h> 10 11 typedef struct { 12 PetscInt smooths; 13 } PC_GAMG_AGG; 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PCGAMGSetNSmooths" 17 /*@ 18 PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical) 19 20 Not Collective on PC 21 22 Input Parameters: 23 . pc - the preconditioner context 24 25 Options Database Key: 26 . -pc_gamg_agg_nsmooths 27 28 Level: intermediate 29 30 Concepts: Aggregation AMG preconditioner 31 32 .seealso: () 33 @*/ 34 PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n) 35 { 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 PetscValidHeaderSpecific(pc,PC_CLASSID,1); 40 ierr = PetscTryMethod(pc,"PCGAMGSetNSmooths_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr); 41 PetscFunctionReturn(0); 42 } 43 44 EXTERN_C_BEGIN 45 #undef __FUNCT__ 46 #define __FUNCT__ "PCGAMGSetNSmooths_GAMG" 47 PetscErrorCode PCGAMGSetNSmooths_GAMG(PC pc, PetscInt n) 48 { 49 PC_MG *mg = (PC_MG*)pc->data; 50 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 51 PC_GAMG_AGG *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx; 52 53 PetscFunctionBegin; 54 assert(n>=0); 55 pc_gamg_sa->smooths = n; 56 PetscFunctionReturn(0); 57 } 58 EXTERN_C_END 59 60 /* -------------------------------------------------------------------------- */ 61 /* 62 PCSetFromOptions_GAMG_AGG 63 64 Input Parameter: 65 . pc - 66 */ 67 #undef __FUNCT__ 68 #define __FUNCT__ "PCSetFromOptions_GAMG_AGG" 69 PetscErrorCode PCSetFromOptions_GAMG_AGG( PC pc ) 70 { 71 PetscErrorCode ierr; 72 PC_MG *mg = (PC_MG*)pc->data; 73 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 74 PC_GAMG_AGG *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx; 75 PetscBool flag; 76 77 PetscFunctionBegin; 78 /* call base class */ 79 ierr = PCSetFromOptions_GAMG( pc ); CHKERRQ(ierr); 80 81 ierr = PetscOptionsHead("GAMG-AGG options"); CHKERRQ(ierr); 82 { 83 /* -pc_gamg_agg_nsmooths */ 84 pc_gamg_sa->smooths = 0; 85 ierr = PetscOptionsInt("-pc_gamg_agg_nsmooths", 86 "smoothing steps for smoothed aggregation, usually 1 (0)", 87 "PCGAMGSetNSmooths", 88 pc_gamg_sa->smooths, 89 &pc_gamg_sa->smooths, 90 &flag); 91 CHKERRQ(ierr); 92 } 93 ierr = PetscOptionsTail();CHKERRQ(ierr); 94 95 if( pc_gamg->verbose ) { 96 PetscPrintf(PETSC_COMM_WORLD,"[%d]%s done\n",0,__FUNCT__); 97 } 98 99 PetscFunctionReturn(0); 100 } 101 102 /* -------------------------------------------------------------------------- */ 103 /* 104 PCDestroy_AGG 105 106 Input Parameter: 107 . pc - 108 */ 109 #undef __FUNCT__ 110 #define __FUNCT__ "PCDestroy_AGG" 111 PetscErrorCode PCDestroy_AGG( PC pc ) 112 { 113 PetscErrorCode ierr; 114 PC_MG *mg = (PC_MG*)pc->data; 115 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 116 PC_GAMG_AGG *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx; 117 118 PetscFunctionBegin; 119 if( pc_gamg_sa ) { 120 ierr = PetscFree(pc_gamg_sa);CHKERRQ(ierr); 121 pc_gamg_sa = 0; 122 } 123 124 /* call base class */ 125 ierr = PCDestroy_GAMG( pc );CHKERRQ(ierr); 126 127 PetscFunctionReturn(0); 128 } 129 130 /* -------------------------------------------------------------------------- */ 131 /* 132 PCSetCoordinates_AGG 133 134 Input Parameter: 135 . pc - the preconditioner context 136 */ 137 EXTERN_C_BEGIN 138 #undef __FUNCT__ 139 #define __FUNCT__ "PCSetCoordinates_AGG" 140 PetscErrorCode PCSetCoordinates_AGG( PC pc, PetscInt ndm, PetscReal *coords ) 141 { 142 PC_MG *mg = (PC_MG*)pc->data; 143 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 144 PetscErrorCode ierr; 145 PetscInt arrsz,bs,my0,kk,ii,jj,nloc,Iend; 146 Mat Amat = pc->pmat; 147 148 PetscFunctionBegin; 149 PetscValidHeaderSpecific( Amat, MAT_CLASSID, 1 ); 150 ierr = MatGetBlockSize( Amat, &bs ); CHKERRQ( ierr ); 151 ierr = MatGetOwnershipRange( Amat, &my0, &Iend ); CHKERRQ(ierr); 152 nloc = (Iend-my0)/bs; 153 if((Iend-my0)%bs!=0) SETERRQ1(((PetscObject)Amat)->comm,PETSC_ERR_ARG_WRONG, "Bad local size %d.",nloc); 154 155 /* SA: null space vectors */ 156 if( coords && bs==1 ) pc_gamg->data_cols = 1; /* scalar w/ coords and SA (not needed) */ 157 else if( coords ) pc_gamg->data_cols = (ndm==2 ? 3 : 6); /* elasticity */ 158 else pc_gamg->data_cols = bs; /* no data, force SA with constant null space vectors */ 159 pc_gamg->data_rows = bs; 160 161 arrsz = nloc*pc_gamg->data_rows*pc_gamg->data_cols; 162 163 /* create data - syntactic sugar that should be refactored at some point */ 164 if (pc_gamg->data==0 || (pc_gamg->data_sz != arrsz)) { 165 ierr = PetscFree( pc_gamg->data ); CHKERRQ(ierr); 166 ierr = PetscMalloc((arrsz+1)*sizeof(PetscReal), &pc_gamg->data ); CHKERRQ(ierr); 167 } 168 for(kk=0;kk<arrsz;kk++)pc_gamg->data[kk] = -999999999.; 169 pc_gamg->data[arrsz] = -99.; 170 /* copy data in - column oriented */ 171 for(kk=0;kk<nloc;kk++){ 172 const PetscInt M = Iend - my0; 173 PetscReal *data = &pc_gamg->data[kk*bs]; 174 if( pc_gamg->data_cols==1 ) *data = 1.0; 175 else { 176 for(ii=0;ii<bs;ii++) 177 for(jj=0;jj<bs;jj++) 178 if(ii==jj)data[ii*M + jj] = 1.0; /* translational modes */ 179 else data[ii*M + jj] = 0.0; 180 if( coords ) { 181 if( ndm == 2 ){ /* rotational modes */ 182 data += 2*M; 183 data[0] = -coords[2*kk+1]; 184 data[1] = coords[2*kk]; 185 } 186 else { 187 data += 3*M; 188 data[0] = 0.0; data[M+0] = coords[3*kk+2]; data[2*M+0] = -coords[3*kk+1]; 189 data[1] = -coords[3*kk+2]; data[M+1] = 0.0; data[2*M+1] = coords[3*kk]; 190 data[2] = coords[3*kk+1]; data[M+2] = -coords[3*kk]; data[2*M+2] = 0.0; 191 } 192 } 193 } 194 } 195 assert(pc_gamg->data[arrsz] == -99.); 196 197 pc_gamg->data_sz = arrsz; 198 199 PetscFunctionReturn(0); 200 } 201 EXTERN_C_END 202 203 204 /* -------------------------------------------------------------------------- */ 205 /* 206 PCSetData_AGG 207 208 Input Parameter: 209 . pc - 210 */ 211 #undef __FUNCT__ 212 #define __FUNCT__ "PCSetData_AGG" 213 PetscErrorCode PCSetData_AGG( PC pc ) 214 { 215 PetscErrorCode ierr; 216 PetscFunctionBegin; 217 ierr = PCSetCoordinates_AGG( pc, -1, PETSC_NULL ); CHKERRQ(ierr); 218 PetscFunctionReturn(0); 219 } 220 221 /* -------------------------------------------------------------------------- */ 222 /* 223 PCCreateGAMG_AGG 224 225 Input Parameter: 226 . pc - 227 */ 228 #undef __FUNCT__ 229 #define __FUNCT__ "PCCreateGAMG_AGG" 230 PetscErrorCode PCCreateGAMG_AGG( PC pc ) 231 { 232 PetscErrorCode ierr; 233 PC_MG *mg = (PC_MG*)pc->data; 234 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 235 PC_GAMG_AGG *pc_gamg_sa; 236 237 PetscFunctionBegin; 238 /* create sub context for SA */ 239 ierr = PetscNewLog( pc, PC_GAMG_AGG, &pc_gamg_sa ); CHKERRQ(ierr); 240 assert(!pc_gamg->subctx); 241 pc_gamg->subctx = pc_gamg_sa; 242 243 pc->ops->setfromoptions = PCSetFromOptions_GAMG_AGG; 244 pc->ops->destroy = PCDestroy_AGG; 245 /* reset does not do anything; setup not virtual */ 246 247 /* set internal function pointers */ 248 pc_gamg->createprolongator = PCGAMGcreateProl_AGG; 249 pc_gamg->createdefaultdata = PCSetData_AGG; 250 251 ierr = PetscObjectComposeFunctionDynamic( (PetscObject)pc, 252 "PCSetCoordinates_C", 253 "PCSetCoordinates_AGG", 254 PCSetCoordinates_AGG); 255 PetscFunctionReturn(0); 256 } 257 258 /* -------------------------------------------------------------------------- */ 259 /* 260 formProl0 261 262 Input Parameter: 263 . selected - list of selected local ID, includes selected ghosts 264 . locals_llist - linked list with aggregates 265 . bs - block size 266 . nSAvec - num columns of new P 267 . my0crs - global index of locals 268 . data_stride - bs*(nloc nodes + ghost nodes) 269 . data_in[data_stride*nSAvec] - local data on fine grid 270 . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist' 271 Output Parameter: 272 . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data 273 . a_Prol - prolongation operator 274 */ 275 #undef __FUNCT__ 276 #define __FUNCT__ "formProl0" 277 PetscErrorCode formProl0(IS selected, /* list of selected local ID, includes selected ghosts */ 278 IS locals_llist, /* linked list from selected vertices of aggregate unselected vertices */ 279 const PetscInt bs, 280 const PetscInt nSAvec, 281 const PetscInt my0crs, 282 const PetscInt data_stride, 283 PetscReal data_in[], 284 const PetscInt flid_fgid[], 285 PetscReal **a_data_out, 286 Mat a_Prol /* prolongation operator (output)*/ 287 ) 288 { 289 PetscErrorCode ierr; 290 PetscInt Istart,Iend,nFineLoc,clid,flid,aggID,kk,jj,ii,nLocalSelected,ndone,nSelected; 291 MPI_Comm wcomm = ((PetscObject)a_Prol)->comm; 292 PetscMPIInt mype, npe; 293 const PetscInt *selected_idx,*llist_idx; 294 PetscReal *out_data; 295 296 PetscFunctionBegin; 297 ierr = MPI_Comm_rank(wcomm,&mype);CHKERRQ(ierr); 298 ierr = MPI_Comm_size(wcomm,&npe);CHKERRQ(ierr); 299 ierr = MatGetOwnershipRange( a_Prol, &Istart, &Iend ); CHKERRQ(ierr); 300 nFineLoc = (Iend-Istart)/bs; assert((Iend-Istart)%bs==0); 301 302 ierr = ISGetLocalSize( selected, &nSelected ); CHKERRQ(ierr); 303 ierr = ISGetIndices( selected, &selected_idx ); CHKERRQ(ierr); 304 for(kk=0,nLocalSelected=0;kk<nSelected;kk++){ 305 PetscInt lid = selected_idx[kk]; 306 if(lid<nFineLoc) nLocalSelected++; 307 } 308 309 /* aloc space for coarse point data (output) */ 310 #define DATA_OUT_STRIDE (nLocalSelected*nSAvec) 311 ierr = PetscMalloc( (DATA_OUT_STRIDE*nSAvec+1)*sizeof(PetscReal), &out_data ); CHKERRQ(ierr); 312 for(ii=0;ii<DATA_OUT_STRIDE*nSAvec+1;ii++) out_data[ii]=1.e300; 313 *a_data_out = out_data; /* output - stride nLocalSelected*nSAvec */ 314 315 /* find points and set prolongation */ 316 ndone = 0; 317 ierr = ISGetIndices( locals_llist, &llist_idx ); CHKERRQ(ierr); 318 for( clid = 0 ; clid < nLocalSelected ; clid++ ){ 319 PetscInt cgid = my0crs + clid, cids[100]; 320 321 /* count agg */ 322 aggID = 0; 323 flid = selected_idx[clid]; assert(flid != -1); 324 do{ 325 aggID++; 326 } while( (flid=llist_idx[flid]) != -1 ); 327 328 /* get block */ 329 { 330 PetscBLASInt asz=aggID,M=asz*bs,N=nSAvec,INFO; 331 PetscBLASInt Mdata=M+((N-M>0)?N-M:0),LDA=Mdata,LWORK=N*bs; 332 PetscScalar *qqc,*qqr,*TAU,*WORK; 333 PetscInt *fids; 334 335 ierr = PetscMalloc( (Mdata*N)*sizeof(PetscScalar), &qqc ); CHKERRQ(ierr); 336 ierr = PetscMalloc( (M*N)*sizeof(PetscScalar), &qqr ); CHKERRQ(ierr); 337 ierr = PetscMalloc( N*sizeof(PetscScalar), &TAU ); CHKERRQ(ierr); 338 ierr = PetscMalloc( LWORK*sizeof(PetscScalar), &WORK ); CHKERRQ(ierr); 339 ierr = PetscMalloc( M*sizeof(PetscInt), &fids ); CHKERRQ(ierr); 340 341 flid = selected_idx[clid]; 342 aggID = 0; 343 do{ 344 /* copy in B_i matrix - column oriented */ 345 PetscReal *data = &data_in[flid*bs]; 346 for( kk = ii = 0; ii < bs ; ii++ ) { 347 for( jj = 0; jj < N ; jj++ ) { 348 qqc[jj*Mdata + aggID*bs + ii] = data[jj*data_stride + ii]; 349 } 350 } 351 352 /* set fine IDs */ 353 for(kk=0;kk<bs;kk++) fids[aggID*bs + kk] = flid_fgid[flid]*bs + kk; 354 355 aggID++; 356 }while( (flid=llist_idx[flid]) != -1 ); 357 358 /* pad with zeros */ 359 for( ii = asz*bs; ii < Mdata ; ii++ ) { 360 for( jj = 0; jj < N ; jj++, kk++ ) { 361 qqc[jj*Mdata + ii] = .0; 362 } 363 } 364 365 ndone += aggID; 366 /* QR */ 367 LAPACKgeqrf_( &Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO ); 368 if( INFO != 0 ) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRS error"); 369 /* get R - column oriented - output B_{i+1} */ 370 { 371 PetscReal *data = &out_data[clid*nSAvec]; 372 for( jj = 0; jj < nSAvec ; jj++ ) { 373 for( ii = 0; ii < nSAvec ; ii++ ) { 374 assert(data[jj*DATA_OUT_STRIDE + ii] == 1.e300); 375 if( ii <= jj ) data[jj*DATA_OUT_STRIDE + ii] = PetscRealPart(qqc[jj*Mdata + ii]); 376 else data[jj*DATA_OUT_STRIDE + ii] = 0.; 377 } 378 } 379 } 380 381 /* get Q - row oriented */ 382 LAPACKungqr_( &Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO ); 383 if( INFO != 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR error arg %d",-INFO); 384 385 for( ii = 0 ; ii < M ; ii++ ){ 386 for( jj = 0 ; jj < N ; jj++ ) { 387 qqr[N*ii + jj] = qqc[jj*Mdata + ii]; 388 } 389 } 390 391 /* add diagonal block of P0 */ 392 for(kk=0;kk<N;kk++) cids[kk] = N*cgid + kk; /* global col IDs in P0 */ 393 ierr = MatSetValues(a_Prol,M,fids,N,cids,qqr,INSERT_VALUES); CHKERRQ(ierr); 394 395 ierr = PetscFree( qqc ); CHKERRQ(ierr); 396 ierr = PetscFree( qqr ); CHKERRQ(ierr); 397 ierr = PetscFree( TAU ); CHKERRQ(ierr); 398 ierr = PetscFree( WORK ); CHKERRQ(ierr); 399 ierr = PetscFree( fids ); CHKERRQ(ierr); 400 } /* scoping */ 401 } /* for all coarse nodes */ 402 assert(out_data[nSAvec*DATA_OUT_STRIDE]==1.e300); 403 404 /* ierr = MPI_Allreduce( &ndone, &ii, 1, MPIU_INT, MPIU_SUM, wcomm ); /\* synchronous version *\/ */ 405 /* MatGetSize( a_Prol, &kk, &jj ); */ 406 /* PetscPrintf(PETSC_COMM_WORLD," **** [%d]%s %d total done, N=%d (%d local done)\n",mype,__FUNCT__,ii,kk/bs,ndone); */ 407 408 ierr = ISRestoreIndices( selected, &selected_idx ); CHKERRQ(ierr); 409 ierr = ISRestoreIndices( locals_llist, &llist_idx ); CHKERRQ(ierr); 410 ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 411 ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412 413 PetscFunctionReturn(0); 414 } 415 416 /* -------------------------------------------------------------------------- */ 417 /* 418 PCGAMGcreateProl_AGG 419 420 Input Parameter: 421 . pc - this 422 . Amat - matrix on this fine level 423 . data[nloc*data_sz(in)] 424 Output Parameter: 425 . a_P_out - prolongation operator to the next level 426 . a_data_out - data of coarse grid points (num local columns in 'a_P_out') 427 */ 428 #undef __FUNCT__ 429 #define __FUNCT__ "PCGAMGcreateProl_AGG" 430 PetscErrorCode PCGAMGcreateProl_AGG( PC pc, 431 const Mat Amat, 432 const PetscReal data[], 433 Mat *a_P_out, 434 PetscReal **a_data_out 435 ) 436 { 437 PC_MG *mg = (PC_MG*)pc->data; 438 PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; 439 PC_GAMG_AGG *pc_gamg_sa = (PC_GAMG_AGG*)pc_gamg->subctx; 440 const PetscInt verbose = pc_gamg->verbose; 441 const PetscInt data_cols = pc_gamg->data_cols; 442 PetscErrorCode ierr; 443 PetscInt Istart,Iend,nloc,jj,kk,my0,nLocalSelected,NN,MM,bs_in; 444 Mat Prol, Gmat, AuxMat; 445 PetscMPIInt mype, npe; 446 MPI_Comm wcomm = ((PetscObject)Amat)->comm; 447 IS permIS, llist_1, selected_1; 448 const PetscInt *selected_idx,col_bs=data_cols; 449 450 PetscFunctionBegin; 451 ierr = MPI_Comm_rank( wcomm, &mype); CHKERRQ(ierr); 452 ierr = MPI_Comm_size( wcomm, &npe); CHKERRQ(ierr); 453 ierr = MatGetOwnershipRange( Amat, &Istart, &Iend ); CHKERRQ(ierr); 454 ierr = MatGetSize( Amat, &MM, &NN ); CHKERRQ(ierr); 455 ierr = MatGetBlockSize( Amat, &bs_in ); CHKERRQ( ierr ); 456 nloc = (Iend-Istart)/bs_in; my0 = Istart/bs_in; assert((Iend-Istart)%bs_in==0); 457 458 ierr = createGraph( pc, Amat, &Gmat, &AuxMat, &permIS ); CHKERRQ(ierr); 459 460 /* SELECT COARSE POINTS */ 461 #if defined PETSC_USE_LOG 462 ierr = PetscLogEventBegin(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr); 463 #endif 464 465 ierr = maxIndSetAgg( permIS, Gmat, AuxMat, PETSC_TRUE, &selected_1, &llist_1 ); 466 CHKERRQ(ierr); 467 ierr = MatDestroy( &AuxMat ); CHKERRQ(ierr); 468 469 #if defined PETSC_USE_LOG 470 ierr = PetscLogEventEnd(gamg_setup_events[SET4],0,0,0,0);CHKERRQ(ierr); 471 #endif 472 ierr = ISDestroy(&permIS); CHKERRQ(ierr); 473 474 /* get 'nLocalSelected' */ 475 ierr = ISGetLocalSize( selected_1, &NN ); CHKERRQ(ierr); 476 ierr = ISGetIndices( selected_1, &selected_idx ); CHKERRQ(ierr); 477 for(kk=0,nLocalSelected=0;kk<NN;kk++){ 478 PetscInt lid = selected_idx[kk]; 479 if(lid<nloc) nLocalSelected++; 480 } 481 ierr = ISRestoreIndices( selected_1, &selected_idx ); CHKERRQ(ierr); 482 483 /* create prolongator, create P matrix */ 484 ierr = MatCreateMPIAIJ(wcomm, 485 nloc*bs_in, nLocalSelected*col_bs, 486 PETSC_DETERMINE, PETSC_DETERMINE, 487 data_cols, PETSC_NULL, data_cols, PETSC_NULL, 488 &Prol ); 489 CHKERRQ(ierr); 490 491 /* can get all points "removed" */ 492 ierr = MatGetSize( Prol, &kk, &NN ); CHKERRQ(ierr); 493 if( NN==0 ) { 494 if( verbose ) { 495 PetscPrintf(PETSC_COMM_WORLD,"[%d]%s no selected points on coarse grid\n",mype,__FUNCT__); 496 } 497 ierr = MatDestroy( &Prol ); CHKERRQ(ierr); 498 ierr = ISDestroy( &llist_1 ); CHKERRQ(ierr); 499 ierr = ISDestroy( &selected_1 ); CHKERRQ(ierr); 500 ierr = MatDestroy( &Gmat ); CHKERRQ(ierr); 501 *a_P_out = PETSC_NULL; /* out */ 502 PetscFunctionReturn(0); 503 } 504 505 { /* SA */ 506 PetscReal *data_w_ghost; 507 PetscInt myCrs0, nbnodes=0, *flid_fgid; 508 509 /* create global vector of data in 'data_w_ghost' */ 510 #if defined PETSC_USE_LOG 511 ierr = PetscLogEventBegin(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr); 512 #endif 513 if (npe > 1) { 514 PetscReal *tmp_gdata,*tmp_ldata,*tp2; 515 516 ierr = PetscMalloc( nloc*sizeof(PetscReal), &tmp_ldata ); CHKERRQ(ierr); 517 for( jj = 0 ; jj < data_cols ; jj++ ){ 518 for( kk = 0 ; kk < bs_in ; kk++) { 519 PetscInt ii,nnodes; 520 const PetscReal *tp = data + jj*bs_in*nloc + kk; 521 for( ii = 0 ; ii < nloc ; ii++, tp += bs_in ){ 522 tmp_ldata[ii] = *tp; 523 } 524 ierr = getDataWithGhosts( Gmat, 1, tmp_ldata, &nnodes, &tmp_gdata ); 525 CHKERRQ(ierr); 526 if(jj==0 && kk==0) { /* now I know how many todal nodes - allocate */ 527 ierr = PetscMalloc( nnodes*bs_in*data_cols*sizeof(PetscReal), &data_w_ghost ); CHKERRQ(ierr); 528 nbnodes = bs_in*nnodes; 529 } 530 tp2 = data_w_ghost + jj*bs_in*nnodes + kk; 531 for( ii = 0 ; ii < nnodes ; ii++, tp2 += bs_in ){ 532 *tp2 = tmp_gdata[ii]; 533 } 534 ierr = PetscFree( tmp_gdata ); CHKERRQ(ierr); 535 } 536 } 537 ierr = PetscFree( tmp_ldata ); CHKERRQ(ierr); 538 } 539 else { 540 nbnodes = bs_in*nloc; 541 data_w_ghost = (PetscReal*)data; 542 } 543 544 /* scan my coarse zero gid */ 545 MPI_Scan( &nLocalSelected, &myCrs0, 1, MPIU_INT, MPIU_SUM, wcomm ); 546 myCrs0 -= nLocalSelected; 547 548 /* get P0 */ 549 if( npe > 1 ){ 550 PetscReal *fid_glid_loc,*fiddata; 551 PetscInt nnodes; 552 553 ierr = PetscMalloc( nloc*sizeof(PetscReal), &fid_glid_loc ); CHKERRQ(ierr); 554 for(kk=0;kk<nloc;kk++) fid_glid_loc[kk] = (PetscReal)(my0+kk); 555 ierr = getDataWithGhosts(Gmat, 1, fid_glid_loc, &nnodes, &fiddata); 556 CHKERRQ(ierr); 557 ierr = PetscMalloc( nnodes*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr); 558 for(kk=0;kk<nnodes;kk++) flid_fgid[kk] = (PetscInt)fiddata[kk]; 559 ierr = PetscFree( fiddata ); CHKERRQ(ierr); 560 assert(nnodes==nbnodes/bs_in); 561 ierr = PetscFree( fid_glid_loc ); CHKERRQ(ierr); 562 } 563 else { 564 ierr = PetscMalloc( nloc*sizeof(PetscInt), &flid_fgid ); CHKERRQ(ierr); 565 for(kk=0;kk<nloc;kk++) flid_fgid[kk] = my0 + kk; 566 } 567 ierr = MatDestroy( &Gmat ); CHKERRQ(ierr); 568 #if defined PETSC_USE_LOG 569 ierr = PetscLogEventEnd(gamg_setup_events[SET7],0,0,0,0);CHKERRQ(ierr); 570 #endif 571 572 ierr = formProl0(selected_1,llist_1,bs_in,data_cols,myCrs0,nbnodes, 573 data_w_ghost,flid_fgid,a_data_out,Prol); 574 CHKERRQ(ierr); 575 576 if (npe > 1) ierr = PetscFree( data_w_ghost ); CHKERRQ(ierr); 577 ierr = PetscFree( flid_fgid ); CHKERRQ(ierr); 578 579 /* smooth P0 */ 580 for( jj = 0 ; jj < pc_gamg_sa->smooths ; jj++ ){ 581 Mat tMat; 582 Vec diag; 583 PetscReal alpha, emax, emin; 584 #if defined PETSC_USE_LOG 585 ierr = PetscLogEventBegin(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr); 586 #endif 587 if( jj == 0 ) { 588 KSP eksp; 589 Vec bb, xx; 590 PC pc; 591 ierr = MatGetVecs( Amat, &bb, 0 ); CHKERRQ(ierr); 592 ierr = MatGetVecs( Amat, &xx, 0 ); CHKERRQ(ierr); 593 { 594 PetscRandom rctx; 595 ierr = PetscRandomCreate(wcomm,&rctx);CHKERRQ(ierr); 596 ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr); 597 ierr = VecSetRandom(bb,rctx);CHKERRQ(ierr); 598 ierr = PetscRandomDestroy( &rctx ); CHKERRQ(ierr); 599 } 600 ierr = KSPCreate(wcomm,&eksp); CHKERRQ(ierr); 601 /* ierr = KSPSetType( eksp, KSPCG ); CHKERRQ(ierr); */ 602 ierr = KSPAppendOptionsPrefix( eksp, "est_"); CHKERRQ(ierr); 603 ierr = KSPSetFromOptions( eksp ); CHKERRQ(ierr); 604 ierr = KSPSetInitialGuessNonzero( eksp, PETSC_FALSE ); CHKERRQ(ierr); 605 ierr = KSPSetOperators( eksp, Amat, Amat, SAME_NONZERO_PATTERN ); 606 CHKERRQ( ierr ); 607 ierr = KSPGetPC( eksp, &pc ); CHKERRQ( ierr ); 608 ierr = PCSetType( pc, PCJACOBI ); CHKERRQ(ierr); /* smoother */ 609 ierr = KSPSetTolerances(eksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,10); 610 CHKERRQ(ierr); 611 ierr = KSPSetNormType( eksp, KSP_NORM_NONE ); CHKERRQ(ierr); 612 ierr = KSPSetComputeSingularValues( eksp,PETSC_TRUE ); CHKERRQ(ierr); 613 614 /* solve - keep stuff out of logging */ 615 ierr = PetscLogEventDeactivate(KSP_Solve);CHKERRQ(ierr); 616 ierr = PetscLogEventDeactivate(PC_Apply);CHKERRQ(ierr); 617 ierr = KSPSolve( eksp, bb, xx ); CHKERRQ(ierr); 618 ierr = PetscLogEventActivate(KSP_Solve);CHKERRQ(ierr); 619 ierr = PetscLogEventActivate(PC_Apply);CHKERRQ(ierr); 620 621 ierr = KSPComputeExtremeSingularValues( eksp, &emax, &emin ); CHKERRQ(ierr); 622 if( verbose ) { 623 PetscPrintf(PETSC_COMM_WORLD,"\t\t\t%s smooth P0: max eigen=%e min=%e PC=%s\n", 624 __FUNCT__,emax,emin,PCJACOBI); 625 } 626 ierr = VecDestroy( &xx ); CHKERRQ(ierr); 627 ierr = VecDestroy( &bb ); CHKERRQ(ierr); 628 ierr = KSPDestroy( &eksp ); CHKERRQ(ierr); 629 630 if( pc_gamg->emax_id == -1 ) { 631 ierr = PetscObjectComposedDataRegister( &pc_gamg->emax_id ); 632 assert(pc_gamg->emax_id != -1 ); 633 } 634 ierr = PetscObjectComposedDataSetScalar( (PetscObject)Amat, pc_gamg->emax_id, emax ); CHKERRQ(ierr); 635 } 636 637 /* smooth P1 := (I - omega/lam D^{-1}A)P0 */ 638 ierr = MatMatMult( Amat, Prol, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &tMat ); CHKERRQ(ierr); 639 ierr = MatGetVecs( Amat, &diag, 0 ); CHKERRQ(ierr); 640 ierr = MatGetDiagonal( Amat, diag ); CHKERRQ(ierr); /* effectively PCJACOBI */ 641 ierr = VecReciprocal( diag ); CHKERRQ(ierr); 642 ierr = MatDiagonalScale( tMat, diag, 0 ); CHKERRQ(ierr); 643 ierr = VecDestroy( &diag ); CHKERRQ(ierr); 644 alpha = -1.5/emax; 645 ierr = MatAYPX( tMat, alpha, Prol, SUBSET_NONZERO_PATTERN ); CHKERRQ(ierr); 646 ierr = MatDestroy( &Prol ); CHKERRQ(ierr); 647 Prol = tMat; 648 #if defined PETSC_USE_LOG 649 ierr = PetscLogEventEnd(gamg_setup_events[SET9],0,0,0,0);CHKERRQ(ierr); 650 #endif 651 } 652 } /* scoping - SA code */ 653 654 /* attach block size of columns */ 655 if( pc_gamg->col_bs_id == -1 ) { 656 ierr = PetscObjectComposedDataRegister( &pc_gamg->col_bs_id ); 657 assert(pc_gamg->col_bs_id != -1 ); 658 } 659 ierr = PetscObjectComposedDataSetInt( (PetscObject)Prol, pc_gamg->col_bs_id, data_cols ); CHKERRQ(ierr); 660 661 *a_P_out = Prol; /* out */ 662 663 ierr = ISDestroy( &llist_1 ); CHKERRQ(ierr); 664 ierr = ISDestroy( &selected_1 ); CHKERRQ(ierr); 665 666 PetscFunctionReturn(0); 667 } 668