1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP) 19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static VecScatter scatter7; 42 static int firstpetsccall = 1; 43 static PetscInt maxitsHist; 44 static PetscReal* resHist; 45 46 static Mat lhsPs; 47 static PC pcs; 48 static KSP ksps; 49 static Vec DyPs, resPs, DyPLocals; 50 static IS LocalIndexSets; 51 static ISLocalToGlobalMapping VectorMappings; 52 static VecScatter scatter7s; 53 static int firstpetsccalls = 1; 54 55 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 56 PetscReal resNrm; 57 58 59 void SolGMRp(double* y, double* ac, double* yold, 60 double* x, int* iBC, double* BC, 61 int* col, int* row, double* lhsk, 62 double* res, double* BDiag, 63 int* iper, 64 int* ilwork, double* shp, double* shgl, double* shpb, 65 double* shglb, double* Dy, double* rerr, long long int* fncorp) 66 { 67 // 68 // ---------------------------------------------------------------------- 69 // 70 // This is the preconditioned GMRES driver routine. 71 // 72 // input: 73 // y (nshg,ndof) : Y-variables at n+alpha_v 74 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 75 // yold (nshg,ndof) : Y-variables at beginning of step 76 // acold (nshg,ndof) : Primvar. accel. variable at begng step 77 // x (numnp,nsd) : node coordinates 78 // iBC (nshg) : BC codes 79 // BC (nshg,ndofBC) : BC constraint parameters 80 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 81 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 82 // 83 // output: 84 // res (nshg,nflow) : preconditioned residual 85 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 86 // 87 // 88 // Zdenek Johan, Winter 1991. (Fortran 90) 89 // ---------------------------------------------------------------------- 90 // 91 92 93 // Get variables from common_c.h 94 int nshg, nflow, nsd, iownnodes; 95 nshg = conpar.nshg; 96 nflow = conpar.nflow; 97 nsd = NSD; 98 iownnodes = conpar.iownnodes; 99 100 gcorp_t nshgt; 101 gcorp_t mbeg; 102 gcorp_t mend; 103 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 104 mbeg = (gcorp_t) newdim.minowned; 105 mend = (gcorp_t) newdim.maxowned; 106 107 108 int node, element, var, eqn; 109 double valtoinsert; 110 int nenl, iel, lelCat, lcsyst, iorder, nshl; 111 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 112 double DyFlat[nshg*nflow]; 113 double DyFlatPhasta[nshg*nflow]; 114 double rmes[nshg*nflow]; 115 // DEBUG 116 int i,j,k,l,m; 117 118 // FIXME: PetscScalar 119 double real_rtol, real_abstol, real_dtol; 120 // /DEBUG 121 //double parray[1]; // Should be a PetscScalar 122 double *parray; // Should be a PetscScalar 123 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 124 PetscInt petsc_n; 125 PetscOne = 1; 126 uint64_t duration[4]; 127 128 gcorp_t glbNZ; 129 130 if(firstpetsccall == 1) { 131 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill 132 // 133 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 134 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 135 for(i=0;i<iownnodes;i++) { 136 idiagnz[i]=0; 137 iodiagnz[i]=0; 138 } 139 i=0; 140 for(k=0;k<nshg;k++) { 141 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 142 // this node is not owned by this rank so we skip 143 } else { 144 for(j=col[i]-1;j<col[i+1]-1;j++) { 145 // assert(row[j]<=nshg); 146 // assert(fncorp[row[j]-1]<=nshgt); 147 glbNZ=fncorp[row[j]-1]; 148 if((glbNZ < mbeg) || (glbNZ > mend)) { 149 iodiagnz[i]++; 150 } else { 151 idiagnz[i]++; 152 } 153 } 154 i++; 155 } 156 } 157 gcorp_t mind=1000; 158 gcorp_t mino=1000; 159 gcorp_t maxd=0; 160 gcorp_t maxo=0; 161 for(i=0;i<iownnodes;i++) { 162 mind=min(mind,idiagnz[i]); 163 mino=min(mino,iodiagnz[i]); 164 maxd=max(maxd,idiagnz[i]); 165 maxo=max(maxo,iodiagnz[i]); 166 // iodiagnz[i]=max(iodiagnz[i],10); 167 // idiagnz[i]=max(idiagnz[i],10); 168 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 169 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 170 } 171 // the above was pretty good but below is faster and not too much more memory...of course once you do this 172 // could just use the constant fill parameters in create but keep it alive for potential later optimization 173 174 for(i=0;i<iownnodes;i++) { 175 iodiagnz[i]=1.3*maxd; 176 idiagnz[i]=1.3*maxd; 177 } 178 179 180 181 if(workfc.numpe < 200){ 182 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 183 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 184 } 185 // Print debug info 186 if(nshgt < 200){ 187 int irank; 188 for(irank=0;irank<workfc.numpe;irank++) { 189 if(irank == workfc.myrank){ 190 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 191 for(i=0;i<iownnodes;i++) { 192 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 193 } 194 } 195 MPI_Barrier(MPI_COMM_WORLD); 196 } 197 } 198 petsc_bs = (PetscInt) nflow; 199 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 200 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 201 petsc_PA = (PetscInt) 40; 202 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 203 0, idiagnz, 0, iodiagnz, &lhsP); 204 205 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 206 207 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 208 #ifdef JEDBROWN 209 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 210 #endif 211 ierr = MatSetUp(lhsP); 212 213 PetscInt myMatStart, myMatEnd; 214 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 215 //debug 216 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 217 } 218 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 219 220 // 221 // .... *******************>> Element Data Formation <<****************** 222 // 223 // 224 // .... set the parameters for flux and surface tension calculations 225 // 226 // 227 genpar.idflx = 0 ; 228 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 229 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 230 231 get_time(duration, (duration+1)); 232 233 ElmGMRPETSc(y, ac, x, 234 shp, shgl, iBC, 235 BC, shpb, 236 shglb, res, 237 rmes, iper, 238 ilwork, rerr , &lhsP); 239 240 get_time((duration+2), (duration+3)); 241 get_max_time_diff((duration), (duration+2), 242 (duration+1), (duration+3), 243 "ElmGMRPETSc \0"); // char(0)) 244 if(firstpetsccall == 1) { 245 // Setup IndexSet. For now, we mimic vector insertion procedure 246 // Since we always reference by global indexes this doesn't matter 247 // except for cache performance) 248 // TODO: Better arrangment? 249 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 250 PetscInt nodetoinsert; 251 nodetoinsert = 0; 252 k=0; 253 //debug 254 if(workfc.myrank == rankdump) { 255 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 256 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 257 } 258 if(workfc.numpe > 1) { 259 for (i=0; i<nshg ; i++) { 260 nodetoinsert = fncorp[i]-1; 261 //debug 262 if(workfc.myrank == rankdump) { 263 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 264 } 265 266 // assert(fncorp[i]>=0); 267 for (j=1; j<=nflow; j++) { 268 indexsetary[k] = nodetoinsert*nflow+(j-1); 269 assert(indexsetary[k]>=0); 270 // assert(fncorp[i]>=0); 271 k = k+1; 272 } 273 } 274 } 275 else { 276 for (i=0; i<nshg ; i++) { 277 nodetoinsert = i; 278 for (j=1; j<=nflow; j++) { 279 indexsetary[k] = nodetoinsert*nflow+(j-1); 280 k = k+1; 281 } 282 } 283 } 284 285 // Create Vector Index Maps 286 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 287 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 288 PETSC_COPY_VALUES, &LocalIndexSet); 289 free(indexsetary); 290 } 291 if(genpar.lhs == 1) { 292 get_time((duration), (duration+1)); 293 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 294 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 295 get_time((duration+2),(duration+3)); 296 get_max_time_diff((duration), (duration+2), 297 (duration+1), (duration+3), 298 "MatAssembly \0"); // char(0)) 299 get_time(duration, (duration+1)); 300 } 301 if(firstpetsccall == 1) { 302 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 303 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 304 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 305 } 306 ierr = VecZeroEntries(resP); 307 if(firstpetsccall == 1) { 308 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 309 } 310 311 PetscRow=0; 312 k = 0; 313 int index; 314 for (i=0; i<nshg; i++ ){ 315 for (j = 1; j<=nflow; j++){ 316 index = i + (j-1)*nshg; 317 valtoinsert = res[index]; 318 if(workfc.numpe > 1) { 319 PetscRow = (fncorp[i]-1)*nflow+(j-1); 320 } 321 else { 322 PetscRow = i*nflow+(j-1); 323 } 324 assert(fncorp[i]<=nshgt); 325 assert(fncorp[i]>0); 326 assert(PetscRow>=0); 327 assert(PetscRow<=nshgt*nflow); 328 ierr = VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES); 329 } 330 } 331 ierr = VecAssemblyBegin(resP); 332 ierr = VecAssemblyEnd(resP); 333 ierr = VecNorm(resP,NORM_2,&resNrm); 334 get_time((duration+2), (duration+3)); 335 get_max_time_diff((duration), (duration+2), 336 (duration+1), (duration+3), 337 "VectorWorkPre \0"); // char(0)) 338 339 get_time((duration),(duration+1)); 340 341 if(firstpetsccall == 1) { 342 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 343 ierr = KSPSetOperators(ksp, lhsP, lhsP); 344 ierr = KSPGetPC(ksp, &pc); 345 ierr = PCSetType(pc, PCPBJACOBI); 346 PetscInt maxits; 347 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 348 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 349 ierr = KSPSetFromOptions(ksp); 350 maxitsHist=1000; 351 resHist= (PetscReal*) malloc(sizeof(PetscReal)*maxitsHist); 352 ierr = KSPSetResidualHistory(ksp,resHist,maxitsHist, PETSC_TRUE); 353 } 354 ierr = KSPSolve(ksp, resP, DyP); 355 ierr = KSPGetResidualHistory(ksp,&resHist,&maxitsHist); 356 PetscReal resNrmP=resHist[0]; 357 get_time((duration+2),(duration+3)); 358 get_max_time_diff((duration), (duration+2), 359 (duration+1), (duration+3), 360 "KSPSolve \0"); // char(0)) 361 get_time((duration),(duration+1)); 362 if(firstpetsccall == 1) { 363 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7); 364 } 365 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 366 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 367 ierr = VecGetArray(DyPLocal, &parray); 368 PetscRow = 0; 369 for ( node = 0; node< nshg; node++) { 370 for (var = 1; var<= nflow; var++) { 371 index = node + (var-1)*nshg; 372 Dy[index] = parray[PetscRow]; 373 PetscRow = PetscRow+1; 374 } 375 } 376 ierr = VecRestoreArray(DyPLocal, &parray); 377 378 firstpetsccall = 0; 379 380 // .... output the statistics 381 // 382 itrpar.iKs=0; // see rstat() 383 PetscInt its; 384 ierr = KSPGetIterationNumber(ksp, &its); 385 itrpar.iKs = (int) its; 386 /* 387 PetscReal scale=1.0/sqrt(1.0*nshgt); 388 if(workfc.myrank ==0) { 389 printf("node resNrmP rosqrtNshgt\n"); 390 for ( node = 0; node<its; node++) { 391 printf(" %d %f %f \n",node,resHist[node],scale*resHist[node]); 392 } 393 } 394 */ 395 get_time((duration+2),(duration+3)); 396 get_max_time_diff((duration), (duration+2), 397 (duration+1), (duration+3), 398 "solWork \0"); // char(0)) 399 itrpar.ntotGM += (int) its; 400 rstatp (&resNrm,&resNrmP); 401 // 402 // .... end 403 // 404 } 405 406 void SolGMRpSclr(double* y, double* ac, 407 double* x, double* elDw, int* iBC, double* BC, 408 int* col, int* row, 409 int* iper, 410 int* ilwork, double* shp, double* shgl, double* shpb, 411 double* shglb, double* res, double* Dy, long long int* fncorp) 412 { 413 // 414 // ---------------------------------------------------------------------- 415 // 416 // This is the preconditioned GMRES driver routine. 417 // 418 // input: 419 // y (nshg,ndof) : Y-variables at n+alpha_v 420 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 421 // yold (nshg,ndof) : Y-variables at beginning of step 422 // acold (nshg,ndof) : Primvar. accel. variable at begng step 423 // x (numnp,nsd) : node coordinates 424 // iBC (nshg) : BC codes 425 // BC (nshg,ndofBC) : BC constraint parameters 426 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 427 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 428 // 429 // ---------------------------------------------------------------------- 430 // 431 432 433 // Get variables from common_c.h 434 int nshg, nflow, nsd, iownnodes; 435 nshg = conpar.nshg; 436 nsd = NSD; 437 iownnodes = conpar.iownnodes; 438 439 gcorp_t nshgt; 440 gcorp_t mbeg; 441 gcorp_t mend; 442 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 443 mbeg = (gcorp_t) newdim.minowned; 444 mend = (gcorp_t) newdim.maxowned; 445 446 447 int node, element, var, eqn; 448 double valtoinsert; 449 int nenl, iel, lelCat, lcsyst, iorder, nshl; 450 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 451 double DyFlats[nshg]; 452 double DyFlatPhastas[nshg]; 453 double rmes[nshg]; 454 // DEBUG 455 int i,j,k,l,m; 456 457 double real_rtol, real_abstol, real_dtol; 458 double *parray; 459 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 460 PetscInt petsc_n; 461 PetscOne = 1; 462 uint64_t duration[4]; 463 464 465 // 466 // 467 // .... *******************>> Element Data Formation <<****************** 468 // 469 // 470 // .... set the parameters for flux and surface tension calculations 471 // 472 // 473 genpar.idflx = 0 ; 474 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 475 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 476 477 478 479 gcorp_t glbNZ; 480 481 if(firstpetsccalls == 1) { 482 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 483 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 484 for(i=0;i<iownnodes;i++) { 485 idiagnz[i]=0; 486 iodiagnz[i]=0; 487 } 488 i=0; 489 for(k=0;k<nshg;k++) { 490 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 491 // this node is not owned by this rank so we skip 492 } else { 493 for(j=col[i]-1;j<col[i+1]-1;j++) { 494 glbNZ=fncorp[row[j]-1]; 495 if((glbNZ < mbeg) || (glbNZ > mend)) { 496 iodiagnz[i]++; 497 } else { 498 idiagnz[i]++; 499 } 500 } 501 i++; 502 } 503 } 504 gcorp_t mind=1000; 505 gcorp_t mino=1000; 506 gcorp_t maxd=0; 507 gcorp_t maxo=0; 508 for(i=0;i<iownnodes;i++) { 509 mind=min(mind,idiagnz[i]); 510 mino=min(mino,iodiagnz[i]); 511 maxd=max(maxd,idiagnz[i]); 512 maxo=max(maxo,iodiagnz[i]); 513 } 514 515 for(i=0;i<iownnodes;i++) { 516 iodiagnz[i]=1.3*maxd; 517 idiagnz[i]=1.3*maxd; 518 } 519 520 521 522 // } 523 // if(firstpetsccalls == 1) { 524 525 petsc_m = (PetscInt) iownnodes; 526 petsc_M = (PetscInt) nshgt; 527 petsc_PA = (PetscInt) 40; 528 529 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 530 0, idiagnz, 0, iodiagnz, &lhsPs); 531 532 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 533 534 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 535 #ifdef HIDEJEDBROWN 536 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 537 #endif 538 ierr = MatSetUp(lhsPs); 539 540 PetscInt myMatStart, myMatEnd; 541 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 542 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 543 } 544 // MPI_Barrier(MPI_COMM_WORLD); 545 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 546 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 547 548 get_time(duration, (duration+1)); 549 550 // MPI_Barrier(MPI_COMM_WORLD); 551 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 552 // for (i=0; i<nshg ; i++) { 553 // assert(fncorp[i]>0); 554 // } 555 556 ElmGMRPETScSclr(y, ac, x, elDw, 557 shp, shgl, iBC, 558 BC, shpb, 559 shglb, res, 560 rmes, iper, 561 ilwork, &lhsPs); 562 if(firstpetsccalls == 1) { 563 // Setup IndexSet. For now, we mimic vector insertion procedure 564 // Since we always reference by global indexes this doesn't matter 565 // except for cache performance) 566 // TODO: Better arrangment? 567 568 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 569 PetscInt nodetoinsert; 570 571 nodetoinsert = 0; 572 k=0; 573 574 //debug 575 if(workfc.myrank == rankdump) { 576 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 577 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 578 } 579 580 if(workfc.numpe > 1) { 581 for (i=0; i<nshg ; i++) { 582 nodetoinsert = fncorp[i]-1; 583 //debug 584 if(workfc.myrank == rankdump) { 585 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 586 } 587 588 indexsetarys[k] = nodetoinsert; 589 k = k+1; 590 } 591 } 592 else { 593 for (i=0; i<nshg ; i++) { 594 nodetoinsert = i; 595 indexsetarys[k] = nodetoinsert; 596 k = k+1; 597 } 598 } 599 600 // Create Vector Index Maps 601 petsc_n = (PetscInt) nshg; 602 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 603 PETSC_COPY_VALUES, &LocalIndexSets); 604 free(indexsetarys); 605 } 606 if(genpar.lhs ==1) { 607 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 608 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 609 } 610 if(firstpetsccalls == 1) { 611 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 612 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 613 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 614 } 615 ierr = VecZeroEntries(resPs); 616 if(firstpetsccalls == 1) { 617 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 618 } 619 620 PetscRow=0; 621 k = 0; 622 int index; 623 for (i=0; i<nshg; i++ ){ 624 valtoinsert = res[i]; 625 if(workfc.numpe > 1) { 626 PetscRow = (fncorp[i]-1); 627 } 628 else { 629 PetscRow = i; 630 } 631 assert(fncorp[i]<=nshgt); 632 assert(fncorp[i]>0); 633 assert(PetscRow>=0); 634 assert(PetscRow<=nshgt); 635 ierr = VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES); 636 } 637 ierr = VecAssemblyBegin(resPs); 638 ierr = VecAssemblyEnd(resPs); 639 ierr = VecNorm(resPs,NORM_2,&resNrm); 640 641 if(firstpetsccalls == 1) { 642 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 643 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 644 ierr = KSPGetPC(ksps, &pcs); 645 ierr = PCSetType(pcs, PCPBJACOBI); 646 PetscInt maxits; 647 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 648 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 649 ierr = KSPSetFromOptions(ksps); 650 } 651 ierr = KSPSolve(ksps, resPs, DyPs); 652 if(firstpetsccalls == 1) { 653 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s); 654 } 655 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 656 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 657 ierr = VecGetArray(DyPLocals, &parray); 658 PetscRow = 0; 659 for ( node = 0; node< nshg; node++) { 660 index = node; 661 Dy[index] = parray[PetscRow]; 662 PetscRow = PetscRow+1; 663 } 664 ierr = VecRestoreArray(DyPLocals, &parray); 665 666 firstpetsccalls = 0; 667 668 // .... output the statistics 669 // 670 // itrpar.iKss=0; // see rstat() 671 PetscInt its; 672 ierr = KSPGetIterationNumber(ksps, &its); 673 itrpar.iKss = (int) its; 674 itrpar.ntotGMs += (int) its; 675 rstatpSclr (&resNrm); 676 // 677 // .... end 678 // 679 } 680 #endif 681