1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 19 #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet, GlobalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static ISLocalToGlobalMapping GblVectorMapping; 42 static VecScatter scatter7; 43 static int firstpetsccall = 1; 44 45 static Mat lhsPs; 46 static PC pcs; 47 static KSP ksps; 48 static Vec DyPs, resPs, DyPLocals; 49 static IS LocalIndexSets, GlobalIndexSets; 50 static ISLocalToGlobalMapping VectorMappings; 51 static ISLocalToGlobalMapping GblVectorMappings; 52 static VecScatter scatter7s; 53 static int firstpetsccalls = 1; 54 55 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 56 57 58 void SolGMRp(double* y, double* ac, double* yold, 59 double* x, int* iBC, double* BC, 60 int* col, int* row, double* lhsk, 61 double* res, double* BDiag, 62 int* iper, 63 int* ilwork, double* shp, double* shgl, double* shpb, 64 double* shglb, double* Dy, double* rerr, long long int* fncorp) 65 { 66 // 67 // ---------------------------------------------------------------------- 68 // 69 // This is the preconditioned GMRES driver routine. 70 // 71 // input: 72 // y (nshg,ndof) : Y-variables at n+alpha_v 73 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 74 // yold (nshg,ndof) : Y-variables at beginning of step 75 // acold (nshg,ndof) : Primvar. accel. variable at begng step 76 // x (numnp,nsd) : node coordinates 77 // iBC (nshg) : BC codes 78 // BC (nshg,ndofBC) : BC constraint parameters 79 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 80 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 81 // 82 // output: 83 // res (nshg,nflow) : preconditioned residual 84 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 85 // 86 // 87 // Zdenek Johan, Winter 1991. (Fortran 90) 88 // ---------------------------------------------------------------------- 89 // 90 91 92 // Get variables from common_c.h 93 int nshg, nflow, nsd, iownnodes; 94 nshg = conpar.nshg; 95 nflow = conpar.nflow; 96 nsd = NSD; 97 iownnodes = conpar.iownnodes; 98 99 gcorp_t nshgt; 100 gcorp_t mbeg; 101 gcorp_t mend; 102 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 103 mbeg = (gcorp_t) newdim.minowned; 104 mend = (gcorp_t) newdim.maxowned; 105 106 107 int node, element, var, eqn; 108 double valtoinsert; 109 int nenl, iel, lelCat, lcsyst, iorder, nshl; 110 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 111 double DyFlat[nshg*nflow]; 112 double DyFlatPhasta[nshg*nflow]; 113 double rmes[nshg*nflow]; 114 // DEBUG 115 int i,j,k,l,m; 116 117 // FIXME: PetscScalar 118 double real_rtol, real_abstol, real_dtol; 119 // /DEBUG 120 //double parray[1]; // Should be a PetscScalar 121 double *parray; // Should be a PetscScalar 122 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 123 PetscInt petsc_n; 124 PetscOne = 1; 125 uint64_t duration[4]; 126 127 // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 128 129 if(firstpetsccall == 1) { 130 /* call get_time(duration(1),duration(2)); 131 call system('sleep 10'); 132 call get_time(duration(3),duration(4)); 133 call get_max_time_diff(duration(1), duration(3), 134 duration(2), duration(4), 135 "TenSecondSleep " // char(0)) 136 if(myrank .eq. 0) { 137 !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 138 } 139 */ 140 } 141 // 142 // 143 // .... *******************>> Element Data Formation <<****************** 144 // 145 // 146 // .... set the parameters for flux and surface tension calculations 147 // 148 // 149 genpar.idflx = 0 ; 150 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 151 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 152 153 154 155 gcorp_t glbNZ; 156 157 if(firstpetsccall == 1) { 158 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 159 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 160 for(i=0;i<iownnodes;i++) { 161 idiagnz[i]=0; 162 iodiagnz[i]=0; 163 } 164 i=0; 165 for(k=0;k<nshg;k++) { 166 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 167 // this node is not owned by this rank so we skip 168 } else { 169 for(j=col[i]-1;j<col[i+1]-1;j++) { 170 // assert(row[j]<=nshg); 171 // assert(fncorp[row[j]-1]<=nshgt); 172 glbNZ=fncorp[row[j]-1]; 173 if((glbNZ < mbeg) || (glbNZ > mend)) { 174 iodiagnz[i]++; 175 } else { 176 idiagnz[i]++; 177 } 178 } 179 i++; 180 } 181 } 182 gcorp_t mind=1000; 183 gcorp_t mino=1000; 184 gcorp_t maxd=0; 185 gcorp_t maxo=0; 186 for(i=0;i<iownnodes;i++) { 187 mind=min(mind,idiagnz[i]); 188 mino=min(mino,iodiagnz[i]); 189 maxd=max(maxd,idiagnz[i]); 190 maxo=max(maxo,iodiagnz[i]); 191 // iodiagnz[i]=max(iodiagnz[i],10); 192 // idiagnz[i]=max(idiagnz[i],10); 193 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 194 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 195 } 196 // the above was pretty good but below is faster and not too much more memory...of course once you do this 197 // could just use the constant fill parameters in create but keep it alive for potential later optimization 198 199 for(i=0;i<iownnodes;i++) { 200 iodiagnz[i]=1.3*maxd; 201 idiagnz[i]=1.3*maxd; 202 } 203 204 205 206 if(workfc.numpe < 200){ 207 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 208 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 209 } 210 // Print debug info 211 if(nshgt < 200){ 212 int irank; 213 for(irank=0;irank<workfc.numpe;irank++) { 214 if(irank == workfc.myrank){ 215 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 216 for(i=0;i<iownnodes;i++) { 217 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 218 } 219 } 220 MPI_Barrier(MPI_COMM_WORLD); 221 } 222 } 223 // } 224 // if(firstpetsccall == 1) { 225 226 petsc_bs = (PetscInt) nflow; 227 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 228 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 229 petsc_PA = (PetscInt) 40; 230 // TODO: fixe nshgt for 64 bit integer!!!!! 231 232 //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 233 // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 234 235 // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 236 // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 237 238 // next 2 do pre-allocate 239 240 // MPI_Barrier(MPI_COMM_WORLD); 241 // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 242 243 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 244 0, idiagnz, 0, iodiagnz, &lhsP); 245 246 // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 247 // MPI_Barrier(MPI_COMM_WORLD); 248 // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 249 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 250 251 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 252 #ifdef HIDEJEDBROWN 253 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 254 #endif 255 ierr = MatSetUp(lhsP); 256 257 PetscInt myMatStart, myMatEnd; 258 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 259 //debug 260 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 261 } 262 // MPI_Barrier(MPI_COMM_WORLD); 263 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 264 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 265 266 get_time(duration, (duration+1)); 267 268 // MPI_Barrier(MPI_COMM_WORLD); 269 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 270 // for (i=0; i<nshg ; i++) { 271 // assert(fncorp[i]>0); 272 // } 273 274 ElmGMRPETSc(y, ac, x, 275 shp, shgl, iBC, 276 BC, shpb, 277 shglb, res, 278 rmes, iper, 279 ilwork, rerr , &lhsP); 280 // Below was just to confirm that what is in rstat is the res or b vector given to PETSc 281 // rstat (res, ilwork); 282 // if(workfc.myrank ==0) printf("residual after ElmGMRPETSc\n"); 283 get_time((duration+2), (duration+3)); 284 get_max_time_diff((duration), (duration+2), 285 (duration+1), (duration+3), 286 "ElmGMRPETSc \0"); // char(0)) 287 // printf("after ElmGMRPETSc\n"); 288 if(firstpetsccall == 1) { 289 // Setup IndexSet. For now, we mimic vector insertion procedure 290 // Since we always reference by global indexes this doesn't matter 291 // except for cache performance) 292 // TODO: Better arrangment? 293 //PetscInt indexsetary[nflow*nshg]; 294 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 295 PetscInt* gblindexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 296 PetscInt nodetoinsert; 297 nodetoinsert = 0; 298 k=0; 299 if(workfc.myrank == rankdump) { 300 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 301 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 302 } 303 if(workfc.numpe > 1) { 304 for (i=0; i<nshg ; i++) { 305 nodetoinsert = fncorp[i]-1; 306 307 //debug 308 if(workfc.myrank == rankdump) { 309 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 310 } 311 312 // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 313 // assert(fncorp[i]>=0); 314 for (j=1; j<=nflow; j++) { 315 indexsetary[k] = nodetoinsert*nflow+(j-1); 316 assert(indexsetary[k]>=0); 317 // assert(fncorp[i]>=0); 318 k = k+1; 319 } 320 } 321 } 322 else { 323 for (i=0; i<nshg ; i++) { 324 nodetoinsert = i; 325 for (j=1; j<=nflow; j++) { 326 indexsetary[k] = nodetoinsert*nflow+(j-1); 327 k = k+1; 328 } 329 } 330 } 331 332 for (i=0; i<nshg*nflow; i++) { 333 gblindexsetary[i] = i ; //TODO: better option for performance? 334 //debug 335 if(workfc.myrank == rankdump) { 336 printf("myrank,i,glbindexsetary %d %d %d \n",workfc.myrank,i,gblindexsetary[i]); 337 } 338 } 339 // Create Vector Index Maps 340 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 341 //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 342 // PETSC_COPY_VALUES, &LocalIndexSet); 343 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 344 PETSC_COPY_VALUES, &LocalIndexSet); 345 // printf("after 1st ISCreateGeneral %d\n", ierr); 346 //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 347 // PETSC_COPY_VALUES, &GlobalIndexSet); 348 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 349 PETSC_COPY_VALUES, &GlobalIndexSet); 350 // printf("after 2nd ISCreateGeneral %d\n", ierr); 351 ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 352 // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 353 ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 354 // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 355 free(indexsetary); 356 free(gblindexsetary); 357 } 358 if(genpar.lhs == 1) { 359 get_time((duration), (duration+1)); 360 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 361 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 362 get_time((duration+2),(duration+3)); 363 get_max_time_diff((duration), (duration+2), 364 (duration+1), (duration+3), 365 "MatAssembly \0"); // char(0)) 366 get_time(duration, (duration+1)); 367 } 368 if(firstpetsccall == 1) { 369 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 370 // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 371 //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 372 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 373 //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 374 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 375 } 376 ierr = VecZeroEntries(resP); 377 ierr = VecZeroEntries(DyP); 378 if(firstpetsccall == 1) { 379 //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 380 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 381 } 382 // call VecZeroEntries(DyPLocal, ierr) 383 384 PetscRow=0; 385 k = 0; 386 int index; 387 for (i=0; i<nshg; i++ ){ 388 for (j = 1; j<=nflow; j++){ 389 index = i + (j-1)*nshg; 390 valtoinsert = res[index]; 391 if(workfc.numpe > 1) { 392 PetscRow = (fncorp[i]-1)*nflow+(j-1); 393 } 394 else { 395 PetscRow = i*nflow+(j-1); 396 } 397 assert(fncorp[i]<=nshgt); 398 assert(fncorp[i]>0); 399 assert(PetscRow>=0); 400 assert(PetscRow<=nshgt*nflow); 401 ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 402 } 403 } 404 // printf("after VecSetValue loop %d\n",ierr); 405 ierr = VecAssemblyBegin(resP); 406 ierr = VecAssemblyEnd(resP); 407 get_time((duration+2), (duration+3)); 408 get_max_time_diff((duration), (duration+2), 409 (duration+1), (duration+3), 410 "VectorWorkPre \0"); // char(0)) 411 412 get_time((duration),(duration+1)); 413 414 if(firstpetsccall == 1) { 415 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 416 ierr = KSPSetOperators(ksp, lhsP, lhsP); 417 //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 418 ierr = KSPGetPC(ksp, &pc); 419 ierr = PCSetType(pc, PCPBJACOBI); 420 PetscInt maxits; 421 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 422 //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 423 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 424 ierr = KSPSetFromOptions(ksp); 425 } 426 ierr = KSPSolve(ksp, resP, DyP); 427 get_time((duration+2),(duration+3)); 428 get_max_time_diff((duration), (duration+2), 429 (duration+1), (duration+3), 430 "KSPSolve \0"); // char(0)) 431 //if(myrank .eq. 0) then 432 //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 433 //end if 434 get_time((duration),(duration+1)); 435 if(firstpetsccall == 1) { 436 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 437 } 438 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 439 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 440 ierr = VecGetArray(DyPLocal, &parray); 441 PetscRow = 0; 442 for ( node = 0; node< nshg; node++) { 443 for (var = 1; var<= nflow; var++) { 444 index = node + (var-1)*nshg; 445 Dy[index] = parray[PetscRow]; 446 PetscRow = PetscRow+1; 447 } 448 } 449 ierr = VecRestoreArray(DyPLocal, &parray); 450 451 firstpetsccall = 0; 452 // later timer ('Solver '); 453 454 455 // .... output the statistics 456 // 457 itrpar.iKs=0; // see rstat() 458 PetscInt its; 459 //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 460 ierr = KSPGetIterationNumber(ksp, &its); 461 itrpar.iKs = (int) its; 462 get_time((duration+2),(duration+3)); 463 get_max_time_diff((duration), (duration+2), 464 (duration+1), (duration+3), 465 "solWork \0"); // char(0)) 466 itrpar.ntotGM += (int) its; 467 rstat (res, ilwork); 468 // 469 // .... stop the timer 470 // 471 // 3002 continue ! no solve just res. 472 // later call timer ('Back ') 473 // 474 // .... end 475 // 476 } 477 478 void SolGMRpSclr(double* y, double* ac, 479 double* x, double* elDw, int* iBC, double* BC, 480 int* col, int* row, 481 int* iper, 482 int* ilwork, double* shp, double* shgl, double* shpb, 483 double* shglb, double* res, double* Dy, long long int* fncorp) 484 { 485 // 486 // ---------------------------------------------------------------------- 487 // 488 // This is the preconditioned GMRES driver routine. 489 // 490 // input: 491 // y (nshg,ndof) : Y-variables at n+alpha_v 492 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 493 // yold (nshg,ndof) : Y-variables at beginning of step 494 // acold (nshg,ndof) : Primvar. accel. variable at begng step 495 // x (numnp,nsd) : node coordinates 496 // iBC (nshg) : BC codes 497 // BC (nshg,ndofBC) : BC constraint parameters 498 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 499 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 500 // 501 // ---------------------------------------------------------------------- 502 // 503 504 505 // Get variables from common_c.h 506 int nshg, nflow, nsd, iownnodes; 507 nshg = conpar.nshg; 508 nsd = NSD; 509 iownnodes = conpar.iownnodes; 510 511 gcorp_t nshgt; 512 gcorp_t mbeg; 513 gcorp_t mend; 514 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 515 mbeg = (gcorp_t) newdim.minowned; 516 mend = (gcorp_t) newdim.maxowned; 517 518 519 int node, element, var, eqn; 520 double valtoinsert; 521 int nenl, iel, lelCat, lcsyst, iorder, nshl; 522 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 523 double DyFlats[nshg]; 524 double DyFlatPhastas[nshg]; 525 double rmes[nshg]; 526 // DEBUG 527 int i,j,k,l,m; 528 529 double real_rtol, real_abstol, real_dtol; 530 double *parray; 531 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 532 PetscInt petsc_n; 533 PetscOne = 1; 534 uint64_t duration[4]; 535 536 537 // 538 // 539 // .... *******************>> Element Data Formation <<****************** 540 // 541 // 542 // .... set the parameters for flux and surface tension calculations 543 // 544 // 545 genpar.idflx = 0 ; 546 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 547 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 548 549 550 551 gcorp_t glbNZ; 552 553 if(firstpetsccalls == 1) { 554 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 555 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 556 for(i=0;i<iownnodes;i++) { 557 idiagnz[i]=0; 558 iodiagnz[i]=0; 559 } 560 i=0; 561 for(k=0;k<nshg;k++) { 562 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 563 // this node is not owned by this rank so we skip 564 } else { 565 for(j=col[i]-1;j<col[i+1]-1;j++) { 566 glbNZ=fncorp[row[j]-1]; 567 if((glbNZ < mbeg) || (glbNZ > mend)) { 568 iodiagnz[i]++; 569 } else { 570 idiagnz[i]++; 571 } 572 } 573 i++; 574 } 575 } 576 gcorp_t mind=1000; 577 gcorp_t mino=1000; 578 gcorp_t maxd=0; 579 gcorp_t maxo=0; 580 for(i=0;i<iownnodes;i++) { 581 mind=min(mind,idiagnz[i]); 582 mino=min(mino,iodiagnz[i]); 583 maxd=max(maxd,idiagnz[i]); 584 maxo=max(maxo,iodiagnz[i]); 585 } 586 587 for(i=0;i<iownnodes;i++) { 588 iodiagnz[i]=1.3*maxd; 589 idiagnz[i]=1.3*maxd; 590 } 591 592 593 594 // } 595 // if(firstpetsccalls == 1) { 596 597 petsc_m = (PetscInt) iownnodes; 598 petsc_M = (PetscInt) nshgt; 599 petsc_PA = (PetscInt) 40; 600 601 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 602 0, idiagnz, 0, iodiagnz, &lhsPs); 603 604 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 605 606 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 607 #ifdef HIDEJEDBROWN 608 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 609 #endif 610 ierr = MatSetUp(lhsPs); 611 612 PetscInt myMatStart, myMatEnd; 613 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 614 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 615 } 616 // MPI_Barrier(MPI_COMM_WORLD); 617 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 618 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 619 620 get_time(duration, (duration+1)); 621 622 // MPI_Barrier(MPI_COMM_WORLD); 623 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 624 // for (i=0; i<nshg ; i++) { 625 // assert(fncorp[i]>0); 626 // } 627 628 ElmGMRPETScSclr(y, ac, x, elDw, 629 shp, shgl, iBC, 630 BC, shpb, 631 shglb, res, 632 rmes, iper, 633 ilwork, &lhsPs); 634 if(firstpetsccalls == 1) { 635 // Setup IndexSet. For now, we mimic vector insertion procedure 636 // Since we always reference by global indexes this doesn't matter 637 // except for cache performance) 638 // TODO: Better arrangment? 639 640 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 641 PetscInt* gblindexsetarys = malloc(sizeof(PetscInt)*nshg); 642 PetscInt nodetoinsert; 643 644 nodetoinsert = 0; 645 k=0; 646 647 //debug 648 if(workfc.myrank == rankdump) { 649 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 650 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 651 } 652 653 if(workfc.numpe > 1) { 654 for (i=0; i<nshg ; i++) { 655 nodetoinsert = fncorp[i]-1; 656 //debug 657 if(workfc.myrank == rankdump) { 658 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 659 } 660 661 indexsetarys[k] = nodetoinsert; 662 k = k+1; 663 } 664 } 665 else { 666 for (i=0; i<nshg ; i++) { 667 nodetoinsert = i; 668 indexsetarys[k] = nodetoinsert; 669 k = k+1; 670 } 671 } 672 673 for (i=0; i<nshg; i++) { 674 gblindexsetarys[i] = i ; //TODO: better option for performance? 675 //debug 676 if(workfc.myrank == rankdump) { 677 printf("myrank,i,glbindexsetarys %d %d %d \n",workfc.myrank,i,gblindexsetarys[i]); 678 } 679 } 680 // Create Vector Index Maps 681 petsc_n = (PetscInt) nshg; 682 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 683 PETSC_COPY_VALUES, &LocalIndexSets); 684 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 685 PETSC_COPY_VALUES, &GlobalIndexSets); 686 ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 687 ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 688 free(indexsetarys); 689 free(gblindexsetarys); 690 } 691 if(genpar.lhs ==1) { 692 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 693 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 694 } 695 if(firstpetsccalls == 1) { 696 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 697 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 698 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 699 } 700 ierr = VecZeroEntries(resPs); 701 ierr = VecZeroEntries(DyPs); 702 if(firstpetsccalls == 1) { 703 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 704 } 705 706 PetscRow=0; 707 k = 0; 708 int index; 709 for (i=0; i<nshg; i++ ){ 710 valtoinsert = res[i]; 711 if(workfc.numpe > 1) { 712 PetscRow = (fncorp[i]-1); 713 } 714 else { 715 PetscRow = i-1; 716 } 717 assert(fncorp[i]<=nshgt); 718 assert(fncorp[i]>0); 719 assert(PetscRow>=0); 720 assert(PetscRow<=nshgt); 721 ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 722 } 723 // printf("after VecSetValue loop %d\n",ierr); 724 ierr = VecAssemblyBegin(resPs); 725 ierr = VecAssemblyEnd(resPs); 726 727 if(firstpetsccalls == 1) { 728 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 729 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 730 //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 731 ierr = KSPGetPC(ksps, &pcs); 732 ierr = PCSetType(pcs, PCPBJACOBI); 733 PetscInt maxits; 734 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 735 //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 736 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 737 ierr = KSPSetFromOptions(ksps); 738 } 739 ierr = KSPSolve(ksps, resPs, DyPs); 740 if(firstpetsccalls == 1) { 741 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 742 } 743 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 744 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 745 ierr = VecGetArray(DyPLocals, &parray); 746 PetscRow = 0; 747 for ( node = 0; node< nshg; node++) { 748 index = node; 749 Dy[index] = parray[PetscRow]; 750 PetscRow = PetscRow+1; 751 } 752 ierr = VecRestoreArray(DyPLocals, &parray); 753 754 firstpetsccalls = 0; 755 756 // .... output the statistics 757 // 758 // itrpar.iKss=0; // see rstat() 759 PetscInt its; 760 ierr = KSPGetIterationNumber(ksps, &its); 761 itrpar.iKss = (int) its; 762 itrpar.ntotGMs += (int) its; 763 rstatSclr (res, ilwork); 764 // 765 // .... end 766 // 767 } 768 #endif 769