1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 19 #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal, resPGlobal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet, GlobalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static ISLocalToGlobalMapping GblVectorMapping; 42 static VecScatter scatter7; 43 static int firstpetsccall = 1; 44 45 static Mat lhsPs; 46 static PC pcs; 47 static KSP ksps; 48 static Vec DyPs, resPs, DyPLocals, resPGlobals; 49 static IS LocalIndexSets, GlobalIndexSets; 50 static ISLocalToGlobalMapping VectorMappings; 51 static ISLocalToGlobalMapping GblVectorMappings; 52 static VecScatter scatter7s; 53 static int firstpetsccalls = 1; 54 55 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 56 57 58 void SolGMRp(double* y, double* ac, double* yold, 59 double* x, int* iBC, double* BC, 60 int* col, int* row, double* lhsk, 61 double* res, double* BDiag, 62 int* iper, 63 int* ilwork, double* shp, double* shgl, double* shpb, 64 double* shglb, double* Dy, double* rerr, long long int* fncorp) 65 { 66 // 67 // ---------------------------------------------------------------------- 68 // 69 // This is the preconditioned GMRES driver routine. 70 // 71 // input: 72 // y (nshg,ndof) : Y-variables at n+alpha_v 73 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 74 // yold (nshg,ndof) : Y-variables at beginning of step 75 // acold (nshg,ndof) : Primvar. accel. variable at begng step 76 // x (numnp,nsd) : node coordinates 77 // iBC (nshg) : BC codes 78 // BC (nshg,ndofBC) : BC constraint parameters 79 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 80 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 81 // 82 // output: 83 // res (nshg,nflow) : preconditioned residual 84 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 85 // 86 // 87 // Zdenek Johan, Winter 1991. (Fortran 90) 88 // ---------------------------------------------------------------------- 89 // 90 91 92 // Get variables from common_c.h 93 int nshg, nflow, nsd, iownnodes; 94 nshg = conpar.nshg; 95 nflow = conpar.nflow; 96 nsd = NSD; 97 iownnodes = conpar.iownnodes; 98 99 gcorp_t nshgt; 100 gcorp_t mbeg; 101 gcorp_t mend; 102 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 103 mbeg = (gcorp_t) newdim.minowned; 104 mend = (gcorp_t) newdim.maxowned; 105 106 107 int node, element, var, eqn; 108 double valtoinsert; 109 int nenl, iel, lelCat, lcsyst, iorder, nshl; 110 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 111 double DyFlat[nshg*nflow]; 112 double DyFlatPhasta[nshg*nflow]; 113 double rmes[nshg*nflow]; 114 // DEBUG 115 int i,j,k,l,m; 116 117 // FIXME: PetscScalar 118 double real_rtol, real_abstol, real_dtol; 119 // /DEBUG 120 //double parray[1]; // Should be a PetscScalar 121 double *parray; // Should be a PetscScalar 122 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 123 PetscInt petsc_n; 124 PetscOne = 1; 125 uint64_t duration[4]; 126 127 // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 128 129 if(firstpetsccall == 1) { 130 /* call get_time(duration(1),duration(2)); 131 call system('sleep 10'); 132 call get_time(duration(3),duration(4)); 133 call get_max_time_diff(duration(1), duration(3), 134 duration(2), duration(4), 135 "TenSecondSleep " // char(0)) 136 if(myrank .eq. 0) { 137 !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 138 } 139 */ 140 } 141 // 142 // 143 // .... *******************>> Element Data Formation <<****************** 144 // 145 // 146 // .... set the parameters for flux and surface tension calculations 147 // 148 // 149 genpar.idflx = 0 ; 150 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 151 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 152 153 154 155 gcorp_t glbNZ; 156 157 if(firstpetsccall == 1) { 158 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 159 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 160 for(i=0;i<iownnodes;i++) { 161 idiagnz[i]=0; 162 iodiagnz[i]=0; 163 } 164 i=0; 165 for(k=0;k<nshg;k++) { 166 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 167 // this node is not owned by this rank so we skip 168 } else { 169 for(j=col[i]-1;j<col[i+1]-1;j++) { 170 // assert(row[j]<=nshg); 171 // assert(fncorp[row[j]-1]<=nshgt); 172 glbNZ=fncorp[row[j]-1]; 173 if((glbNZ < mbeg) || (glbNZ > mend)) { 174 iodiagnz[i]++; 175 } else { 176 idiagnz[i]++; 177 } 178 } 179 i++; 180 } 181 } 182 gcorp_t mind=1000; 183 gcorp_t mino=1000; 184 gcorp_t maxd=0; 185 gcorp_t maxo=0; 186 for(i=0;i<iownnodes;i++) { 187 mind=min(mind,idiagnz[i]); 188 mino=min(mino,iodiagnz[i]); 189 maxd=max(maxd,idiagnz[i]); 190 maxo=max(maxo,iodiagnz[i]); 191 // iodiagnz[i]=max(iodiagnz[i],10); 192 // idiagnz[i]=max(idiagnz[i],10); 193 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 194 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 195 } 196 // the above was pretty good but below is faster and not too much more memory...of course once you do this 197 // could just use the constant fill parameters in create but keep it alive for potential later optimization 198 199 for(i=0;i<iownnodes;i++) { 200 iodiagnz[i]=1.3*maxd; 201 idiagnz[i]=1.3*maxd; 202 } 203 204 205 206 if(workfc.numpe < 200){ 207 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 208 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 209 } 210 // Print debug info 211 if(nshgt < 200){ 212 int irank; 213 for(irank=0;irank<workfc.numpe;irank++) { 214 if(irank == workfc.myrank){ 215 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 216 for(i=0;i<iownnodes;i++) { 217 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 218 } 219 } 220 MPI_Barrier(MPI_COMM_WORLD); 221 } 222 } 223 // } 224 // if(firstpetsccall == 1) { 225 226 petsc_bs = (PetscInt) nflow; 227 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 228 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 229 petsc_PA = (PetscInt) 40; 230 // TODO: fixe nshgt for 64 bit integer!!!!! 231 232 //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 233 // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 234 235 // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 236 // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 237 238 // next 2 do pre-allocate 239 240 // MPI_Barrier(MPI_COMM_WORLD); 241 // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 242 243 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 244 0, idiagnz, 0, iodiagnz, &lhsP); 245 246 // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 247 // MPI_Barrier(MPI_COMM_WORLD); 248 // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 249 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 250 251 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 252 #ifdef HIDEJEDBROWN 253 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 254 #endif 255 ierr = MatSetUp(lhsP); 256 257 PetscInt myMatStart, myMatEnd; 258 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 259 //debug 260 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 261 } 262 // MPI_Barrier(MPI_COMM_WORLD); 263 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 264 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 265 266 get_time(duration, (duration+1)); 267 268 // MPI_Barrier(MPI_COMM_WORLD); 269 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 270 // for (i=0; i<nshg ; i++) { 271 // assert(fncorp[i]>0); 272 // } 273 274 ElmGMRPETSc(y, ac, x, 275 shp, shgl, iBC, 276 BC, shpb, 277 shglb, res, 278 rmes, iper, 279 ilwork, rerr , &lhsP); 280 get_time((duration+2), (duration+3)); 281 get_max_time_diff((duration), (duration+2), 282 (duration+1), (duration+3), 283 "ElmGMRPETSc \0"); // char(0)) 284 // printf("after ElmGMRPETSc\n"); 285 if(firstpetsccall == 1) { 286 // Setup IndexSet. For now, we mimic vector insertion procedure 287 // Since we always reference by global indexes this doesn't matter 288 // except for cache performance) 289 // TODO: Better arrangment? 290 //PetscInt indexsetary[nflow*nshg]; 291 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 292 PetscInt* gblindexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 293 PetscInt nodetoinsert; 294 nodetoinsert = 0; 295 k=0; 296 if(workfc.myrank == rankdump) { 297 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 298 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 299 } 300 if(workfc.numpe > 1) { 301 for (i=0; i<nshg ; i++) { 302 nodetoinsert = fncorp[i]-1; 303 304 //debug 305 if(workfc.myrank == rankdump) { 306 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 307 } 308 309 // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 310 // assert(fncorp[i]>=0); 311 for (j=1; j<=nflow; j++) { 312 indexsetary[k] = nodetoinsert*nflow+(j-1); 313 assert(indexsetary[k]>=0); 314 // assert(fncorp[i]>=0); 315 k = k+1; 316 } 317 } 318 } 319 else { 320 for (i=0; i<nshg ; i++) { 321 nodetoinsert = i; 322 for (j=1; j<=nflow; j++) { 323 indexsetary[k] = nodetoinsert*nflow+(j-1); 324 k = k+1; 325 } 326 } 327 } 328 329 for (i=0; i<nshg*nflow; i++) { 330 gblindexsetary[i] = i ; //TODO: better option for performance? 331 //debug 332 if(workfc.myrank == rankdump) { 333 printf("myrank,i,glbindexsetary %d %d %d \n",workfc.myrank,i,gblindexsetary[i]); 334 } 335 } 336 // Create Vector Index Maps 337 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 338 //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 339 // PETSC_COPY_VALUES, &LocalIndexSet); 340 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 341 PETSC_COPY_VALUES, &LocalIndexSet); 342 // printf("after 1st ISCreateGeneral %d\n", ierr); 343 //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 344 // PETSC_COPY_VALUES, &GlobalIndexSet); 345 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 346 PETSC_COPY_VALUES, &GlobalIndexSet); 347 // printf("after 2nd ISCreateGeneral %d\n", ierr); 348 ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 349 // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 350 ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 351 // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 352 free(indexsetary); 353 free(gblindexsetary); 354 } 355 if(genpar.lhs == 1) { 356 get_time((duration), (duration+1)); 357 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 358 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 359 get_time((duration+2),(duration+3)); 360 get_max_time_diff((duration), (duration+2), 361 (duration+1), (duration+3), 362 "MatAssembly \0"); // char(0)) 363 get_time(duration, (duration+1)); 364 } 365 if(firstpetsccall == 1) { 366 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 367 // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 368 //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 369 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 370 //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 371 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 372 } 373 ierr = VecZeroEntries(resP); 374 ierr = VecZeroEntries(DyP); 375 if(firstpetsccall == 1) { 376 //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal); 377 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal); 378 //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 379 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 380 } 381 ierr = VecZeroEntries(resPGlobal); 382 // call VecZeroEntries(DyPLocal, ierr) 383 384 PetscRow=0; 385 k = 0; 386 int index; 387 for (i=0; i<nshg; i++ ){ 388 for (j = 1; j<=nflow; j++){ 389 index = i + (j-1)*nshg; 390 valtoinsert = res[index]; 391 if(workfc.numpe > 1) { 392 PetscRow = (fncorp[i]-1)*nflow+(j-1); 393 } 394 else { 395 PetscRow = i*nflow+(j-1); 396 } 397 assert(fncorp[i]<=nshgt); 398 assert(fncorp[i]>0); 399 assert(PetscRow>=0); 400 assert(PetscRow<=nshgt*nflow); 401 ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 402 } 403 } 404 // printf("after VecSetValue loop %d\n",ierr); 405 ierr = VecAssemblyBegin(resP); 406 ierr = VecAssemblyEnd(resP); 407 get_time((duration+2), (duration+3)); 408 get_max_time_diff((duration), (duration+2), 409 (duration+1), (duration+3), 410 "VectorWorkPre \0"); // char(0)) 411 412 get_time((duration),(duration+1)); 413 414 if(firstpetsccall == 1) { 415 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 416 ierr = KSPSetOperators(ksp, lhsP, lhsP); 417 //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 418 ierr = KSPGetPC(ksp, &pc); 419 ierr = PCSetType(pc, PCPBJACOBI); 420 PetscInt maxits; 421 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 422 //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 423 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 424 ierr = KSPSetFromOptions(ksp); 425 } 426 ierr = KSPSolve(ksp, resP, DyP); 427 get_time((duration+2),(duration+3)); 428 get_max_time_diff((duration), (duration+2), 429 (duration+1), (duration+3), 430 "KSPSolve \0"); // char(0)) 431 //if(myrank .eq. 0) then 432 //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 433 //end if 434 get_time((duration),(duration+1)); 435 if(firstpetsccall == 1) { 436 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 437 } 438 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 439 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 440 ierr = VecGetArray(DyPLocal, &parray); 441 PetscRow = 0; 442 for ( node = 0; node< nshg; node++) { 443 for (var = 1; var<= nflow; var++) { 444 index = node + (var-1)*nshg; 445 Dy[index] = parray[PetscRow]; 446 PetscRow = PetscRow+1; 447 } 448 } 449 ierr = VecRestoreArray(DyPLocal, &parray); 450 451 firstpetsccall = 0; 452 // later timer ('Solver '); 453 454 455 // .... output the statistics 456 // 457 itrpar.iKs=0; // see rstat() 458 PetscInt its; 459 //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 460 ierr = KSPGetIterationNumber(ksp, &its); 461 itrpar.iKs = (int) its; 462 get_time((duration+2),(duration+3)); 463 get_max_time_diff((duration), (duration+2), 464 (duration+1), (duration+3), 465 "solWork \0"); // char(0)) 466 itrpar.ntotGM += (int) its; 467 rstat (res, ilwork); 468 // 469 // .... stop the timer 470 // 471 // 3002 continue ! no solve just res. 472 // later call timer ('Back ') 473 // 474 // .... end 475 // 476 } 477 478 void SolGMRpSclr(double* y, double* ac, 479 double* x, double* elDw, int* iBC, double* BC, 480 int* col, int* row, 481 int* iper, 482 int* ilwork, double* shp, double* shgl, double* shpb, 483 double* shglb, double* res, double* Dy, long long int* fncorp) 484 { 485 // 486 // ---------------------------------------------------------------------- 487 // 488 // This is the preconditioned GMRES driver routine. 489 // 490 // input: 491 // y (nshg,ndof) : Y-variables at n+alpha_v 492 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 493 // yold (nshg,ndof) : Y-variables at beginning of step 494 // acold (nshg,ndof) : Primvar. accel. variable at begng step 495 // x (numnp,nsd) : node coordinates 496 // iBC (nshg) : BC codes 497 // BC (nshg,ndofBC) : BC constraint parameters 498 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 499 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 500 // 501 // ---------------------------------------------------------------------- 502 // 503 504 505 // Get variables from common_c.h 506 int nshg, nflow, nsd, iownnodes; 507 nshg = conpar.nshg; 508 nsd = NSD; 509 iownnodes = conpar.iownnodes; 510 511 gcorp_t nshgt; 512 gcorp_t mbeg; 513 gcorp_t mend; 514 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 515 mbeg = (gcorp_t) newdim.minowned; 516 mend = (gcorp_t) newdim.maxowned; 517 518 519 int node, element, var, eqn; 520 double valtoinsert; 521 int nenl, iel, lelCat, lcsyst, iorder, nshl; 522 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 523 double DyFlats[nshg]; 524 double DyFlatPhastas[nshg]; 525 double rmes[nshg]; 526 // DEBUG 527 int i,j,k,l,m; 528 529 double real_rtol, real_abstol, real_dtol; 530 double *parray; 531 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 532 PetscInt petsc_n; 533 PetscOne = 1; 534 uint64_t duration[4]; 535 536 537 // 538 // 539 // .... *******************>> Element Data Formation <<****************** 540 // 541 // 542 // .... set the parameters for flux and surface tension calculations 543 // 544 // 545 genpar.idflx = 0 ; 546 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 547 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 548 549 550 551 gcorp_t glbNZ; 552 553 if(firstpetsccalls == 1) { 554 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 555 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 556 for(i=0;i<iownnodes;i++) { 557 idiagnz[i]=0; 558 iodiagnz[i]=0; 559 } 560 i=0; 561 for(k=0;k<nshg;k++) { 562 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 563 // this node is not owned by this rank so we skip 564 } else { 565 for(j=col[i]-1;j<col[i+1]-1;j++) { 566 glbNZ=fncorp[row[j]-1]; 567 if((glbNZ < mbeg) || (glbNZ > mend)) { 568 iodiagnz[i]++; 569 } else { 570 idiagnz[i]++; 571 } 572 } 573 i++; 574 } 575 } 576 gcorp_t mind=1000; 577 gcorp_t mino=1000; 578 gcorp_t maxd=0; 579 gcorp_t maxo=0; 580 for(i=0;i<iownnodes;i++) { 581 mind=min(mind,idiagnz[i]); 582 mino=min(mino,iodiagnz[i]); 583 maxd=max(maxd,idiagnz[i]); 584 maxo=max(maxo,iodiagnz[i]); 585 } 586 587 for(i=0;i<iownnodes;i++) { 588 iodiagnz[i]=1.3*maxd; 589 idiagnz[i]=1.3*maxd; 590 } 591 592 593 594 // } 595 // if(firstpetsccalls == 1) { 596 597 petsc_m = (PetscInt) iownnodes; 598 petsc_M = (PetscInt) nshgt; 599 petsc_PA = (PetscInt) 40; 600 601 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 602 0, idiagnz, 0, iodiagnz, &lhsPs); 603 604 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 605 606 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 607 #ifdef HIDEJEDBROWN 608 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 609 #endif 610 ierr = MatSetUp(lhsPs); 611 612 PetscInt myMatStart, myMatEnd; 613 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 614 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 615 } 616 // MPI_Barrier(MPI_COMM_WORLD); 617 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 618 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 619 620 get_time(duration, (duration+1)); 621 622 // MPI_Barrier(MPI_COMM_WORLD); 623 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 624 // for (i=0; i<nshg ; i++) { 625 // assert(fncorp[i]>0); 626 // } 627 628 ElmGMRPETScSclr(y, ac, x, elDw, 629 shp, shgl, iBC, 630 BC, shpb, 631 shglb, res, 632 rmes, iper, 633 ilwork, &lhsPs); 634 if(firstpetsccalls == 1) { 635 // Setup IndexSet. For now, we mimic vector insertion procedure 636 // Since we always reference by global indexes this doesn't matter 637 // except for cache performance) 638 // TODO: Better arrangment? 639 640 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 641 PetscInt* gblindexsetarys = malloc(sizeof(PetscInt)*nshg); 642 PetscInt nodetoinsert; 643 644 nodetoinsert = 0; 645 k=0; 646 647 //debug 648 if(workfc.myrank == rankdump) { 649 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 650 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 651 } 652 653 if(workfc.numpe > 1) { 654 for (i=0; i<nshg ; i++) { 655 nodetoinsert = fncorp[i]-1; 656 //debug 657 if(workfc.myrank == rankdump) { 658 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 659 } 660 661 indexsetarys[k] = nodetoinsert; 662 k = k+1; 663 } 664 } 665 else { 666 for (i=0; i<nshg ; i++) { 667 nodetoinsert = i; 668 indexsetarys[k] = nodetoinsert; 669 k = k+1; 670 } 671 } 672 673 for (i=0; i<nshg; i++) { 674 gblindexsetarys[i] = i ; //TODO: better option for performance? 675 //debug 676 if(workfc.myrank == rankdump) { 677 printf("myrank,i,glbindexsetarys %d %d %d \n",workfc.myrank,i,gblindexsetarys[i]); 678 } 679 } 680 // Create Vector Index Maps 681 petsc_n = (PetscInt) nshg; 682 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 683 PETSC_COPY_VALUES, &LocalIndexSets); 684 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 685 PETSC_COPY_VALUES, &GlobalIndexSets); 686 ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 687 ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 688 free(indexsetarys); 689 free(gblindexsetarys); 690 } 691 if(genpar.lhs ==1) { 692 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 693 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 694 } 695 if(firstpetsccalls == 1) { 696 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 697 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 698 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 699 } 700 ierr = VecZeroEntries(resPs); 701 ierr = VecZeroEntries(DyPs); 702 if(firstpetsccalls == 1) { 703 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals); 704 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 705 } 706 ierr = VecZeroEntries(resPGlobals); 707 708 PetscRow=0; 709 k = 0; 710 int index; 711 for (i=0; i<nshg; i++ ){ 712 valtoinsert = res[i]; 713 if(workfc.numpe > 1) { 714 PetscRow = (fncorp[i]-1); 715 } 716 else { 717 PetscRow = i-1; 718 } 719 assert(fncorp[i]<=nshgt); 720 assert(fncorp[i]>0); 721 assert(PetscRow>=0); 722 assert(PetscRow<=nshgt); 723 ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 724 } 725 // printf("after VecSetValue loop %d\n",ierr); 726 ierr = VecAssemblyBegin(resPs); 727 ierr = VecAssemblyEnd(resPs); 728 729 if(firstpetsccalls == 1) { 730 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 731 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 732 //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 733 ierr = KSPGetPC(ksps, &pcs); 734 ierr = PCSetType(pcs, PCPBJACOBI); 735 PetscInt maxits; 736 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 737 //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 738 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 739 ierr = KSPSetFromOptions(ksps); 740 } 741 ierr = KSPSolve(ksps, resPs, DyPs); 742 if(firstpetsccalls == 1) { 743 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 744 } 745 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 746 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 747 ierr = VecGetArray(DyPLocals, &parray); 748 PetscRow = 0; 749 for ( node = 0; node< nshg; node++) { 750 index = node; 751 Dy[index] = parray[PetscRow]; 752 PetscRow = PetscRow+1; 753 } 754 ierr = VecRestoreArray(DyPLocals, &parray); 755 756 firstpetsccalls = 0; 757 758 // .... output the statistics 759 // 760 // itrpar.iKss=0; // see rstat() 761 PetscInt its; 762 ierr = KSPGetIterationNumber(ksps, &its); 763 itrpar.iKss = (int) its; 764 itrpar.ntotGMs += (int) its; 765 rstatSclr (res, ilwork); 766 // 767 // .... end 768 // 769 } 770 #endif 771