1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 19 #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static VecScatter scatter7; 42 static int firstpetsccall = 1; 43 44 static Mat lhsPs; 45 static PC pcs; 46 static KSP ksps; 47 static Vec DyPs, resPs, DyPLocals; 48 static IS LocalIndexSets; 49 static ISLocalToGlobalMapping VectorMappings; 50 static VecScatter scatter7s; 51 static int firstpetsccalls = 1; 52 53 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 54 55 56 void SolGMRp(double* y, double* ac, double* yold, 57 double* x, int* iBC, double* BC, 58 int* col, int* row, double* lhsk, 59 double* res, double* BDiag, 60 int* iper, 61 int* ilwork, double* shp, double* shgl, double* shpb, 62 double* shglb, double* Dy, double* rerr, long long int* fncorp) 63 { 64 // 65 // ---------------------------------------------------------------------- 66 // 67 // This is the preconditioned GMRES driver routine. 68 // 69 // input: 70 // y (nshg,ndof) : Y-variables at n+alpha_v 71 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 72 // yold (nshg,ndof) : Y-variables at beginning of step 73 // acold (nshg,ndof) : Primvar. accel. variable at begng step 74 // x (numnp,nsd) : node coordinates 75 // iBC (nshg) : BC codes 76 // BC (nshg,ndofBC) : BC constraint parameters 77 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 78 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 79 // 80 // output: 81 // res (nshg,nflow) : preconditioned residual 82 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 83 // 84 // 85 // Zdenek Johan, Winter 1991. (Fortran 90) 86 // ---------------------------------------------------------------------- 87 // 88 89 90 // Get variables from common_c.h 91 int nshg, nflow, nsd, iownnodes; 92 nshg = conpar.nshg; 93 nflow = conpar.nflow; 94 nsd = NSD; 95 iownnodes = conpar.iownnodes; 96 97 gcorp_t nshgt; 98 gcorp_t mbeg; 99 gcorp_t mend; 100 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 101 mbeg = (gcorp_t) newdim.minowned; 102 mend = (gcorp_t) newdim.maxowned; 103 104 105 int node, element, var, eqn; 106 double valtoinsert; 107 int nenl, iel, lelCat, lcsyst, iorder, nshl; 108 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 109 double DyFlat[nshg*nflow]; 110 double DyFlatPhasta[nshg*nflow]; 111 double rmes[nshg*nflow]; 112 // DEBUG 113 int i,j,k,l,m; 114 115 // FIXME: PetscScalar 116 double real_rtol, real_abstol, real_dtol; 117 // /DEBUG 118 //double parray[1]; // Should be a PetscScalar 119 double *parray; // Should be a PetscScalar 120 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 121 PetscInt petsc_n; 122 PetscOne = 1; 123 uint64_t duration[4]; 124 125 gcorp_t glbNZ; 126 127 if(firstpetsccall == 1) { 128 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill 129 // 130 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 131 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 132 for(i=0;i<iownnodes;i++) { 133 idiagnz[i]=0; 134 iodiagnz[i]=0; 135 } 136 i=0; 137 for(k=0;k<nshg;k++) { 138 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 139 // this node is not owned by this rank so we skip 140 } else { 141 for(j=col[i]-1;j<col[i+1]-1;j++) { 142 // assert(row[j]<=nshg); 143 // assert(fncorp[row[j]-1]<=nshgt); 144 glbNZ=fncorp[row[j]-1]; 145 if((glbNZ < mbeg) || (glbNZ > mend)) { 146 iodiagnz[i]++; 147 } else { 148 idiagnz[i]++; 149 } 150 } 151 i++; 152 } 153 } 154 gcorp_t mind=1000; 155 gcorp_t mino=1000; 156 gcorp_t maxd=0; 157 gcorp_t maxo=0; 158 for(i=0;i<iownnodes;i++) { 159 mind=min(mind,idiagnz[i]); 160 mino=min(mino,iodiagnz[i]); 161 maxd=max(maxd,idiagnz[i]); 162 maxo=max(maxo,iodiagnz[i]); 163 // iodiagnz[i]=max(iodiagnz[i],10); 164 // idiagnz[i]=max(idiagnz[i],10); 165 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 166 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 167 } 168 // the above was pretty good but below is faster and not too much more memory...of course once you do this 169 // could just use the constant fill parameters in create but keep it alive for potential later optimization 170 171 for(i=0;i<iownnodes;i++) { 172 iodiagnz[i]=1.3*maxd; 173 idiagnz[i]=1.3*maxd; 174 } 175 176 177 178 if(workfc.numpe < 200){ 179 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 180 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 181 } 182 // Print debug info 183 if(nshgt < 200){ 184 int irank; 185 for(irank=0;irank<workfc.numpe;irank++) { 186 if(irank == workfc.myrank){ 187 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 188 for(i=0;i<iownnodes;i++) { 189 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 190 } 191 } 192 MPI_Barrier(MPI_COMM_WORLD); 193 } 194 } 195 petsc_bs = (PetscInt) nflow; 196 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 197 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 198 petsc_PA = (PetscInt) 40; 199 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 200 0, idiagnz, 0, iodiagnz, &lhsP); 201 202 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 203 204 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 205 #ifdef JEDBROWN 206 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 207 #endif 208 ierr = MatSetUp(lhsP); 209 210 PetscInt myMatStart, myMatEnd; 211 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 212 //debug 213 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 214 } 215 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 216 217 // 218 // .... *******************>> Element Data Formation <<****************** 219 // 220 // 221 // .... set the parameters for flux and surface tension calculations 222 // 223 // 224 genpar.idflx = 0 ; 225 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 226 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 227 228 get_time(duration, (duration+1)); 229 230 ElmGMRPETSc(y, ac, x, 231 shp, shgl, iBC, 232 BC, shpb, 233 shglb, res, 234 rmes, iper, 235 ilwork, rerr , &lhsP); 236 237 get_time((duration+2), (duration+3)); 238 get_max_time_diff((duration), (duration+2), 239 (duration+1), (duration+3), 240 "ElmGMRPETSc \0"); // char(0)) 241 if(firstpetsccall == 1) { 242 // Setup IndexSet. For now, we mimic vector insertion procedure 243 // Since we always reference by global indexes this doesn't matter 244 // except for cache performance) 245 // TODO: Better arrangment? 246 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 247 PetscInt nodetoinsert; 248 nodetoinsert = 0; 249 k=0; 250 //debug 251 if(workfc.myrank == rankdump) { 252 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 253 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 254 } 255 if(workfc.numpe > 1) { 256 for (i=0; i<nshg ; i++) { 257 nodetoinsert = fncorp[i]-1; 258 //debug 259 if(workfc.myrank == rankdump) { 260 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 261 } 262 263 // assert(fncorp[i]>=0); 264 for (j=1; j<=nflow; j++) { 265 indexsetary[k] = nodetoinsert*nflow+(j-1); 266 assert(indexsetary[k]>=0); 267 // assert(fncorp[i]>=0); 268 k = k+1; 269 } 270 } 271 } 272 else { 273 for (i=0; i<nshg ; i++) { 274 nodetoinsert = i; 275 for (j=1; j<=nflow; j++) { 276 indexsetary[k] = nodetoinsert*nflow+(j-1); 277 k = k+1; 278 } 279 } 280 } 281 282 // Create Vector Index Maps 283 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 284 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 285 PETSC_COPY_VALUES, &LocalIndexSet); 286 free(indexsetary); 287 } 288 if(genpar.lhs == 1) { 289 get_time((duration), (duration+1)); 290 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 291 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 292 get_time((duration+2),(duration+3)); 293 get_max_time_diff((duration), (duration+2), 294 (duration+1), (duration+3), 295 "MatAssembly \0"); // char(0)) 296 get_time(duration, (duration+1)); 297 } 298 if(firstpetsccall == 1) { 299 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 300 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 301 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 302 } 303 ierr = VecZeroEntries(resP); 304 ierr = VecZeroEntries(DyP); 305 if(firstpetsccall == 1) { 306 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 307 } 308 309 PetscRow=0; 310 k = 0; 311 int index; 312 for (i=0; i<nshg; i++ ){ 313 for (j = 1; j<=nflow; j++){ 314 index = i + (j-1)*nshg; 315 valtoinsert = res[index]; 316 if(workfc.numpe > 1) { 317 PetscRow = (fncorp[i]-1)*nflow+(j-1); 318 } 319 else { 320 PetscRow = i*nflow+(j-1); 321 } 322 assert(fncorp[i]<=nshgt); 323 assert(fncorp[i]>0); 324 assert(PetscRow>=0); 325 assert(PetscRow<=nshgt*nflow); 326 ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 327 } 328 } 329 ierr = VecAssemblyBegin(resP); 330 ierr = VecAssemblyEnd(resP); 331 get_time((duration+2), (duration+3)); 332 get_max_time_diff((duration), (duration+2), 333 (duration+1), (duration+3), 334 "VectorWorkPre \0"); // char(0)) 335 336 get_time((duration),(duration+1)); 337 338 if(firstpetsccall == 1) { 339 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 340 ierr = KSPSetOperators(ksp, lhsP, lhsP); 341 ierr = KSPGetPC(ksp, &pc); 342 ierr = PCSetType(pc, PCPBJACOBI); 343 PetscInt maxits; 344 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 345 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 346 ierr = KSPSetFromOptions(ksp); 347 } 348 ierr = KSPSolve(ksp, resP, DyP); 349 get_time((duration+2),(duration+3)); 350 get_max_time_diff((duration), (duration+2), 351 (duration+1), (duration+3), 352 "KSPSolve \0"); // char(0)) 353 get_time((duration),(duration+1)); 354 if(firstpetsccall == 1) { 355 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7); 356 } 357 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 358 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 359 ierr = VecGetArray(DyPLocal, &parray); 360 PetscRow = 0; 361 for ( node = 0; node< nshg; node++) { 362 for (var = 1; var<= nflow; var++) { 363 index = node + (var-1)*nshg; 364 Dy[index] = parray[PetscRow]; 365 PetscRow = PetscRow+1; 366 } 367 } 368 ierr = VecRestoreArray(DyPLocal, &parray); 369 370 firstpetsccall = 0; 371 372 // .... output the statistics 373 // 374 itrpar.iKs=0; // see rstat() 375 PetscInt its; 376 ierr = KSPGetIterationNumber(ksp, &its); 377 itrpar.iKs = (int) its; 378 get_time((duration+2),(duration+3)); 379 get_max_time_diff((duration), (duration+2), 380 (duration+1), (duration+3), 381 "solWork \0"); // char(0)) 382 itrpar.ntotGM += (int) its; 383 rstat (res, ilwork); 384 // 385 // .... end 386 // 387 } 388 389 void SolGMRpSclr(double* y, double* ac, 390 double* x, double* elDw, int* iBC, double* BC, 391 int* col, int* row, 392 int* iper, 393 int* ilwork, double* shp, double* shgl, double* shpb, 394 double* shglb, double* res, double* Dy, long long int* fncorp) 395 { 396 // 397 // ---------------------------------------------------------------------- 398 // 399 // This is the preconditioned GMRES driver routine. 400 // 401 // input: 402 // y (nshg,ndof) : Y-variables at n+alpha_v 403 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 404 // yold (nshg,ndof) : Y-variables at beginning of step 405 // acold (nshg,ndof) : Primvar. accel. variable at begng step 406 // x (numnp,nsd) : node coordinates 407 // iBC (nshg) : BC codes 408 // BC (nshg,ndofBC) : BC constraint parameters 409 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 410 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 411 // 412 // ---------------------------------------------------------------------- 413 // 414 415 416 // Get variables from common_c.h 417 int nshg, nflow, nsd, iownnodes; 418 nshg = conpar.nshg; 419 nsd = NSD; 420 iownnodes = conpar.iownnodes; 421 422 gcorp_t nshgt; 423 gcorp_t mbeg; 424 gcorp_t mend; 425 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 426 mbeg = (gcorp_t) newdim.minowned; 427 mend = (gcorp_t) newdim.maxowned; 428 429 430 int node, element, var, eqn; 431 double valtoinsert; 432 int nenl, iel, lelCat, lcsyst, iorder, nshl; 433 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 434 double DyFlats[nshg]; 435 double DyFlatPhastas[nshg]; 436 double rmes[nshg]; 437 // DEBUG 438 int i,j,k,l,m; 439 440 double real_rtol, real_abstol, real_dtol; 441 double *parray; 442 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 443 PetscInt petsc_n; 444 PetscOne = 1; 445 uint64_t duration[4]; 446 447 448 // 449 // 450 // .... *******************>> Element Data Formation <<****************** 451 // 452 // 453 // .... set the parameters for flux and surface tension calculations 454 // 455 // 456 genpar.idflx = 0 ; 457 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 458 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 459 460 461 462 gcorp_t glbNZ; 463 464 if(firstpetsccalls == 1) { 465 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 466 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 467 for(i=0;i<iownnodes;i++) { 468 idiagnz[i]=0; 469 iodiagnz[i]=0; 470 } 471 i=0; 472 for(k=0;k<nshg;k++) { 473 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 474 // this node is not owned by this rank so we skip 475 } else { 476 for(j=col[i]-1;j<col[i+1]-1;j++) { 477 glbNZ=fncorp[row[j]-1]; 478 if((glbNZ < mbeg) || (glbNZ > mend)) { 479 iodiagnz[i]++; 480 } else { 481 idiagnz[i]++; 482 } 483 } 484 i++; 485 } 486 } 487 gcorp_t mind=1000; 488 gcorp_t mino=1000; 489 gcorp_t maxd=0; 490 gcorp_t maxo=0; 491 for(i=0;i<iownnodes;i++) { 492 mind=min(mind,idiagnz[i]); 493 mino=min(mino,iodiagnz[i]); 494 maxd=max(maxd,idiagnz[i]); 495 maxo=max(maxo,iodiagnz[i]); 496 } 497 498 for(i=0;i<iownnodes;i++) { 499 iodiagnz[i]=1.3*maxd; 500 idiagnz[i]=1.3*maxd; 501 } 502 503 504 505 // } 506 // if(firstpetsccalls == 1) { 507 508 petsc_m = (PetscInt) iownnodes; 509 petsc_M = (PetscInt) nshgt; 510 petsc_PA = (PetscInt) 40; 511 512 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 513 0, idiagnz, 0, iodiagnz, &lhsPs); 514 515 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 516 517 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 518 #ifdef HIDEJEDBROWN 519 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 520 #endif 521 ierr = MatSetUp(lhsPs); 522 523 PetscInt myMatStart, myMatEnd; 524 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 525 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 526 } 527 // MPI_Barrier(MPI_COMM_WORLD); 528 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 529 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 530 531 get_time(duration, (duration+1)); 532 533 // MPI_Barrier(MPI_COMM_WORLD); 534 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 535 // for (i=0; i<nshg ; i++) { 536 // assert(fncorp[i]>0); 537 // } 538 539 ElmGMRPETScSclr(y, ac, x, elDw, 540 shp, shgl, iBC, 541 BC, shpb, 542 shglb, res, 543 rmes, iper, 544 ilwork, &lhsPs); 545 if(firstpetsccalls == 1) { 546 // Setup IndexSet. For now, we mimic vector insertion procedure 547 // Since we always reference by global indexes this doesn't matter 548 // except for cache performance) 549 // TODO: Better arrangment? 550 551 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 552 PetscInt nodetoinsert; 553 554 nodetoinsert = 0; 555 k=0; 556 557 //debug 558 if(workfc.myrank == rankdump) { 559 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 560 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 561 } 562 563 if(workfc.numpe > 1) { 564 for (i=0; i<nshg ; i++) { 565 nodetoinsert = fncorp[i]-1; 566 //debug 567 if(workfc.myrank == rankdump) { 568 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 569 } 570 571 indexsetarys[k] = nodetoinsert; 572 k = k+1; 573 } 574 } 575 else { 576 for (i=0; i<nshg ; i++) { 577 nodetoinsert = i; 578 indexsetarys[k] = nodetoinsert; 579 k = k+1; 580 } 581 } 582 583 // Create Vector Index Maps 584 petsc_n = (PetscInt) nshg; 585 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 586 PETSC_COPY_VALUES, &LocalIndexSets); 587 free(indexsetarys); 588 } 589 if(genpar.lhs ==1) { 590 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 591 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 592 } 593 if(firstpetsccalls == 1) { 594 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 595 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 596 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 597 } 598 ierr = VecZeroEntries(resPs); 599 ierr = VecZeroEntries(DyPs); 600 if(firstpetsccalls == 1) { 601 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 602 } 603 604 PetscRow=0; 605 k = 0; 606 int index; 607 for (i=0; i<nshg; i++ ){ 608 valtoinsert = res[i]; 609 if(workfc.numpe > 1) { 610 PetscRow = (fncorp[i]-1); 611 } 612 else { 613 PetscRow = i-1; 614 } 615 assert(fncorp[i]<=nshgt); 616 assert(fncorp[i]>0); 617 assert(PetscRow>=0); 618 assert(PetscRow<=nshgt); 619 ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 620 } 621 // printf("after VecSetValue loop %d\n",ierr); 622 ierr = VecAssemblyBegin(resPs); 623 ierr = VecAssemblyEnd(resPs); 624 625 if(firstpetsccalls == 1) { 626 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 627 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 628 //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 629 ierr = KSPGetPC(ksps, &pcs); 630 ierr = PCSetType(pcs, PCPBJACOBI); 631 PetscInt maxits; 632 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 633 //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 634 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 635 ierr = KSPSetFromOptions(ksps); 636 } 637 ierr = KSPSolve(ksps, resPs, DyPs); 638 if(firstpetsccalls == 1) { 639 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s); 640 } 641 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 642 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 643 ierr = VecGetArray(DyPLocals, &parray); 644 PetscRow = 0; 645 for ( node = 0; node< nshg; node++) { 646 index = node; 647 Dy[index] = parray[PetscRow]; 648 PetscRow = PetscRow+1; 649 } 650 ierr = VecRestoreArray(DyPLocals, &parray); 651 652 firstpetsccalls = 0; 653 654 // .... output the statistics 655 // 656 // itrpar.iKss=0; // see rstat() 657 PetscInt its; 658 ierr = KSPGetIterationNumber(ksps, &its); 659 itrpar.iKss = (int) its; 660 itrpar.ntotGMs += (int) its; 661 rstatSclr (res, ilwork); 662 // 663 // .... end 664 // 665 } 666 #endif 667