1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP) 19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static VecScatter scatter7; 42 static int firstpetsccall = 1; 43 44 static Mat lhsPs; 45 static PC pcs; 46 static KSP ksps; 47 static Vec DyPs, resPs, DyPLocals; 48 static IS LocalIndexSets; 49 static ISLocalToGlobalMapping VectorMappings; 50 static VecScatter scatter7s; 51 static int firstpetsccalls = 1; 52 53 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 54 PetscReal resNrm; 55 56 57 void SolGMRp(double* y, double* ac, double* yold, 58 double* x, int* iBC, double* BC, 59 int* col, int* row, double* lhsk, 60 double* res, double* BDiag, 61 int* iper, 62 int* ilwork, double* shp, double* shgl, double* shpb, 63 double* shglb, double* Dy, double* rerr, long long int* fncorp) 64 { 65 // 66 // ---------------------------------------------------------------------- 67 // 68 // This is the preconditioned GMRES driver routine. 69 // 70 // input: 71 // y (nshg,ndof) : Y-variables at n+alpha_v 72 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 73 // yold (nshg,ndof) : Y-variables at beginning of step 74 // acold (nshg,ndof) : Primvar. accel. variable at begng step 75 // x (numnp,nsd) : node coordinates 76 // iBC (nshg) : BC codes 77 // BC (nshg,ndofBC) : BC constraint parameters 78 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 79 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 80 // 81 // output: 82 // res (nshg,nflow) : preconditioned residual 83 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 84 // 85 // 86 // Zdenek Johan, Winter 1991. (Fortran 90) 87 // ---------------------------------------------------------------------- 88 // 89 90 91 // Get variables from common_c.h 92 int nshg, nflow, nsd, iownnodes; 93 nshg = conpar.nshg; 94 nflow = conpar.nflow; 95 nsd = NSD; 96 iownnodes = conpar.iownnodes; 97 98 gcorp_t nshgt; 99 gcorp_t mbeg; 100 gcorp_t mend; 101 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 102 mbeg = (gcorp_t) newdim.minowned; 103 mend = (gcorp_t) newdim.maxowned; 104 105 106 int node, element, var, eqn; 107 double valtoinsert; 108 int nenl, iel, lelCat, lcsyst, iorder, nshl; 109 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 110 double DyFlat[nshg*nflow]; 111 double DyFlatPhasta[nshg*nflow]; 112 double rmes[nshg*nflow]; 113 // DEBUG 114 int i,j,k,l,m; 115 116 // FIXME: PetscScalar 117 double real_rtol, real_abstol, real_dtol; 118 // /DEBUG 119 //double parray[1]; // Should be a PetscScalar 120 double *parray; // Should be a PetscScalar 121 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 122 PetscInt petsc_n; 123 PetscOne = 1; 124 uint64_t duration[4]; 125 126 gcorp_t glbNZ; 127 128 if(firstpetsccall == 1) { 129 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill 130 // 131 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 132 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 133 for(i=0;i<iownnodes;i++) { 134 idiagnz[i]=0; 135 iodiagnz[i]=0; 136 } 137 i=0; 138 for(k=0;k<nshg;k++) { 139 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 140 // this node is not owned by this rank so we skip 141 } else { 142 for(j=col[i]-1;j<col[i+1]-1;j++) { 143 // assert(row[j]<=nshg); 144 // assert(fncorp[row[j]-1]<=nshgt); 145 glbNZ=fncorp[row[j]-1]; 146 if((glbNZ < mbeg) || (glbNZ > mend)) { 147 iodiagnz[i]++; 148 } else { 149 idiagnz[i]++; 150 } 151 } 152 i++; 153 } 154 } 155 gcorp_t mind=1000; 156 gcorp_t mino=1000; 157 gcorp_t maxd=0; 158 gcorp_t maxo=0; 159 for(i=0;i<iownnodes;i++) { 160 mind=min(mind,idiagnz[i]); 161 mino=min(mino,iodiagnz[i]); 162 maxd=max(maxd,idiagnz[i]); 163 maxo=max(maxo,iodiagnz[i]); 164 // iodiagnz[i]=max(iodiagnz[i],10); 165 // idiagnz[i]=max(idiagnz[i],10); 166 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 167 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 168 } 169 // the above was pretty good but below is faster and not too much more memory...of course once you do this 170 // could just use the constant fill parameters in create but keep it alive for potential later optimization 171 172 for(i=0;i<iownnodes;i++) { 173 iodiagnz[i]=1.3*maxd; 174 idiagnz[i]=1.3*maxd; 175 } 176 177 178 179 if(workfc.numpe < 200){ 180 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 181 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 182 } 183 // Print debug info 184 if(nshgt < 200){ 185 int irank; 186 for(irank=0;irank<workfc.numpe;irank++) { 187 if(irank == workfc.myrank){ 188 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 189 for(i=0;i<iownnodes;i++) { 190 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 191 } 192 } 193 MPI_Barrier(MPI_COMM_WORLD); 194 } 195 } 196 petsc_bs = (PetscInt) nflow; 197 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 198 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 199 petsc_PA = (PetscInt) 40; 200 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 201 0, idiagnz, 0, iodiagnz, &lhsP); 202 203 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 204 205 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 206 #ifdef JEDBROWN 207 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 208 #endif 209 ierr = MatSetUp(lhsP); 210 211 PetscInt myMatStart, myMatEnd; 212 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 213 //debug 214 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 215 } 216 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 217 218 // 219 // .... *******************>> Element Data Formation <<****************** 220 // 221 // 222 // .... set the parameters for flux and surface tension calculations 223 // 224 // 225 genpar.idflx = 0 ; 226 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 227 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 228 229 get_time(duration, (duration+1)); 230 231 ElmGMRPETSc(y, ac, x, 232 shp, shgl, iBC, 233 BC, shpb, 234 shglb, res, 235 rmes, iper, 236 ilwork, rerr , &lhsP); 237 238 get_time((duration+2), (duration+3)); 239 get_max_time_diff((duration), (duration+2), 240 (duration+1), (duration+3), 241 "ElmGMRPETSc \0"); // char(0)) 242 if(firstpetsccall == 1) { 243 // Setup IndexSet. For now, we mimic vector insertion procedure 244 // Since we always reference by global indexes this doesn't matter 245 // except for cache performance) 246 // TODO: Better arrangment? 247 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 248 PetscInt nodetoinsert; 249 nodetoinsert = 0; 250 k=0; 251 //debug 252 if(workfc.myrank == rankdump) { 253 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 254 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 255 } 256 if(workfc.numpe > 1) { 257 for (i=0; i<nshg ; i++) { 258 nodetoinsert = fncorp[i]-1; 259 //debug 260 if(workfc.myrank == rankdump) { 261 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 262 } 263 264 // assert(fncorp[i]>=0); 265 for (j=1; j<=nflow; j++) { 266 indexsetary[k] = nodetoinsert*nflow+(j-1); 267 assert(indexsetary[k]>=0); 268 // assert(fncorp[i]>=0); 269 k = k+1; 270 } 271 } 272 } 273 else { 274 for (i=0; i<nshg ; i++) { 275 nodetoinsert = i; 276 for (j=1; j<=nflow; j++) { 277 indexsetary[k] = nodetoinsert*nflow+(j-1); 278 k = k+1; 279 } 280 } 281 } 282 283 // Create Vector Index Maps 284 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 285 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 286 PETSC_COPY_VALUES, &LocalIndexSet); 287 free(indexsetary); 288 } 289 if(genpar.lhs == 1) { 290 get_time((duration), (duration+1)); 291 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 292 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 293 get_time((duration+2),(duration+3)); 294 get_max_time_diff((duration), (duration+2), 295 (duration+1), (duration+3), 296 "MatAssembly \0"); // char(0)) 297 get_time(duration, (duration+1)); 298 } 299 if(firstpetsccall == 1) { 300 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 301 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 302 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 303 } 304 ierr = VecZeroEntries(resP); 305 ierr = VecZeroEntries(DyP); 306 if(firstpetsccall == 1) { 307 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 308 } 309 310 PetscRow=0; 311 k = 0; 312 int index; 313 for (i=0; i<nshg; i++ ){ 314 for (j = 1; j<=nflow; j++){ 315 index = i + (j-1)*nshg; 316 valtoinsert = res[index]; 317 if(workfc.numpe > 1) { 318 PetscRow = (fncorp[i]-1)*nflow+(j-1); 319 } 320 else { 321 PetscRow = i*nflow+(j-1); 322 } 323 assert(fncorp[i]<=nshgt); 324 assert(fncorp[i]>0); 325 assert(PetscRow>=0); 326 assert(PetscRow<=nshgt*nflow); 327 ierr = VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES); 328 } 329 } 330 ierr = VecAssemblyBegin(resP); 331 ierr = VecAssemblyEnd(resP); 332 ierr = VecNorm(resP,NORM_2,&resNrm); 333 get_time((duration+2), (duration+3)); 334 get_max_time_diff((duration), (duration+2), 335 (duration+1), (duration+3), 336 "VectorWorkPre \0"); // char(0)) 337 338 get_time((duration),(duration+1)); 339 340 if(firstpetsccall == 1) { 341 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 342 ierr = KSPSetOperators(ksp, lhsP, lhsP); 343 ierr = KSPGetPC(ksp, &pc); 344 ierr = PCSetType(pc, PCPBJACOBI); 345 PetscInt maxits; 346 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 347 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 348 ierr = KSPSetFromOptions(ksp); 349 } 350 ierr = KSPSolve(ksp, resP, DyP); 351 get_time((duration+2),(duration+3)); 352 get_max_time_diff((duration), (duration+2), 353 (duration+1), (duration+3), 354 "KSPSolve \0"); // char(0)) 355 get_time((duration),(duration+1)); 356 if(firstpetsccall == 1) { 357 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7); 358 } 359 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 360 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 361 ierr = VecGetArray(DyPLocal, &parray); 362 PetscRow = 0; 363 for ( node = 0; node< nshg; node++) { 364 for (var = 1; var<= nflow; var++) { 365 index = node + (var-1)*nshg; 366 Dy[index] = parray[PetscRow]; 367 PetscRow = PetscRow+1; 368 } 369 } 370 ierr = VecRestoreArray(DyPLocal, &parray); 371 372 firstpetsccall = 0; 373 374 // .... output the statistics 375 // 376 itrpar.iKs=0; // see rstat() 377 PetscInt its; 378 ierr = KSPGetIterationNumber(ksp, &its); 379 itrpar.iKs = (int) its; 380 get_time((duration+2),(duration+3)); 381 get_max_time_diff((duration), (duration+2), 382 (duration+1), (duration+3), 383 "solWork \0"); // char(0)) 384 itrpar.ntotGM += (int) its; 385 rstatp (&resNrm); 386 // 387 // .... end 388 // 389 } 390 391 void SolGMRpSclr(double* y, double* ac, 392 double* x, double* elDw, int* iBC, double* BC, 393 int* col, int* row, 394 int* iper, 395 int* ilwork, double* shp, double* shgl, double* shpb, 396 double* shglb, double* res, double* Dy, long long int* fncorp) 397 { 398 // 399 // ---------------------------------------------------------------------- 400 // 401 // This is the preconditioned GMRES driver routine. 402 // 403 // input: 404 // y (nshg,ndof) : Y-variables at n+alpha_v 405 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 406 // yold (nshg,ndof) : Y-variables at beginning of step 407 // acold (nshg,ndof) : Primvar. accel. variable at begng step 408 // x (numnp,nsd) : node coordinates 409 // iBC (nshg) : BC codes 410 // BC (nshg,ndofBC) : BC constraint parameters 411 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 412 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 413 // 414 // ---------------------------------------------------------------------- 415 // 416 417 418 // Get variables from common_c.h 419 int nshg, nflow, nsd, iownnodes; 420 nshg = conpar.nshg; 421 nsd = NSD; 422 iownnodes = conpar.iownnodes; 423 424 gcorp_t nshgt; 425 gcorp_t mbeg; 426 gcorp_t mend; 427 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 428 mbeg = (gcorp_t) newdim.minowned; 429 mend = (gcorp_t) newdim.maxowned; 430 431 432 int node, element, var, eqn; 433 double valtoinsert; 434 int nenl, iel, lelCat, lcsyst, iorder, nshl; 435 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 436 double DyFlats[nshg]; 437 double DyFlatPhastas[nshg]; 438 double rmes[nshg]; 439 // DEBUG 440 int i,j,k,l,m; 441 442 double real_rtol, real_abstol, real_dtol; 443 double *parray; 444 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 445 PetscInt petsc_n; 446 PetscOne = 1; 447 uint64_t duration[4]; 448 449 450 // 451 // 452 // .... *******************>> Element Data Formation <<****************** 453 // 454 // 455 // .... set the parameters for flux and surface tension calculations 456 // 457 // 458 genpar.idflx = 0 ; 459 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 460 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 461 462 463 464 gcorp_t glbNZ; 465 466 if(firstpetsccalls == 1) { 467 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 468 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 469 for(i=0;i<iownnodes;i++) { 470 idiagnz[i]=0; 471 iodiagnz[i]=0; 472 } 473 i=0; 474 for(k=0;k<nshg;k++) { 475 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 476 // this node is not owned by this rank so we skip 477 } else { 478 for(j=col[i]-1;j<col[i+1]-1;j++) { 479 glbNZ=fncorp[row[j]-1]; 480 if((glbNZ < mbeg) || (glbNZ > mend)) { 481 iodiagnz[i]++; 482 } else { 483 idiagnz[i]++; 484 } 485 } 486 i++; 487 } 488 } 489 gcorp_t mind=1000; 490 gcorp_t mino=1000; 491 gcorp_t maxd=0; 492 gcorp_t maxo=0; 493 for(i=0;i<iownnodes;i++) { 494 mind=min(mind,idiagnz[i]); 495 mino=min(mino,iodiagnz[i]); 496 maxd=max(maxd,idiagnz[i]); 497 maxo=max(maxo,iodiagnz[i]); 498 } 499 500 for(i=0;i<iownnodes;i++) { 501 iodiagnz[i]=1.3*maxd; 502 idiagnz[i]=1.3*maxd; 503 } 504 505 506 507 // } 508 // if(firstpetsccalls == 1) { 509 510 petsc_m = (PetscInt) iownnodes; 511 petsc_M = (PetscInt) nshgt; 512 petsc_PA = (PetscInt) 40; 513 514 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 515 0, idiagnz, 0, iodiagnz, &lhsPs); 516 517 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 518 519 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 520 #ifdef HIDEJEDBROWN 521 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 522 #endif 523 ierr = MatSetUp(lhsPs); 524 525 PetscInt myMatStart, myMatEnd; 526 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 527 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 528 } 529 // MPI_Barrier(MPI_COMM_WORLD); 530 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 531 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 532 533 get_time(duration, (duration+1)); 534 535 // MPI_Barrier(MPI_COMM_WORLD); 536 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 537 // for (i=0; i<nshg ; i++) { 538 // assert(fncorp[i]>0); 539 // } 540 541 ElmGMRPETScSclr(y, ac, x, elDw, 542 shp, shgl, iBC, 543 BC, shpb, 544 shglb, res, 545 rmes, iper, 546 ilwork, &lhsPs); 547 if(firstpetsccalls == 1) { 548 // Setup IndexSet. For now, we mimic vector insertion procedure 549 // Since we always reference by global indexes this doesn't matter 550 // except for cache performance) 551 // TODO: Better arrangment? 552 553 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 554 PetscInt nodetoinsert; 555 556 nodetoinsert = 0; 557 k=0; 558 559 //debug 560 if(workfc.myrank == rankdump) { 561 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 562 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 563 } 564 565 if(workfc.numpe > 1) { 566 for (i=0; i<nshg ; i++) { 567 nodetoinsert = fncorp[i]-1; 568 //debug 569 if(workfc.myrank == rankdump) { 570 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 571 } 572 573 indexsetarys[k] = nodetoinsert; 574 k = k+1; 575 } 576 } 577 else { 578 for (i=0; i<nshg ; i++) { 579 nodetoinsert = i; 580 indexsetarys[k] = nodetoinsert; 581 k = k+1; 582 } 583 } 584 585 // Create Vector Index Maps 586 petsc_n = (PetscInt) nshg; 587 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 588 PETSC_COPY_VALUES, &LocalIndexSets); 589 free(indexsetarys); 590 } 591 if(genpar.lhs ==1) { 592 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 593 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 594 } 595 if(firstpetsccalls == 1) { 596 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 597 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 598 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 599 } 600 ierr = VecZeroEntries(resPs); 601 ierr = VecZeroEntries(DyPs); 602 if(firstpetsccalls == 1) { 603 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 604 } 605 606 PetscRow=0; 607 k = 0; 608 int index; 609 for (i=0; i<nshg; i++ ){ 610 valtoinsert = res[i]; 611 if(workfc.numpe > 1) { 612 PetscRow = (fncorp[i]-1); 613 } 614 else { 615 PetscRow = i-1; 616 } 617 assert(fncorp[i]<=nshgt); 618 assert(fncorp[i]>0); 619 assert(PetscRow>=0); 620 assert(PetscRow<=nshgt); 621 ierr = VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES); 622 } 623 ierr = VecAssemblyBegin(resPs); 624 ierr = VecAssemblyEnd(resPs); 625 ierr = VecNorm(resPs,NORM_2,&resNrm); 626 627 if(firstpetsccalls == 1) { 628 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 629 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 630 ierr = KSPGetPC(ksps, &pcs); 631 ierr = PCSetType(pcs, PCPBJACOBI); 632 PetscInt maxits; 633 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 634 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 635 ierr = KSPSetFromOptions(ksps); 636 } 637 ierr = KSPSolve(ksps, resPs, DyPs); 638 if(firstpetsccalls == 1) { 639 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s); 640 } 641 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 642 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 643 ierr = VecGetArray(DyPLocals, &parray); 644 PetscRow = 0; 645 for ( node = 0; node< nshg; node++) { 646 index = node; 647 Dy[index] = parray[PetscRow]; 648 PetscRow = PetscRow+1; 649 } 650 ierr = VecRestoreArray(DyPLocals, &parray); 651 652 firstpetsccalls = 0; 653 654 // .... output the statistics 655 // 656 // itrpar.iKss=0; // see rstat() 657 PetscInt its; 658 ierr = KSPGetIterationNumber(ksps, &its); 659 itrpar.iKss = (int) its; 660 itrpar.ntotGMs += (int) its; 661 rstatpSclr (&resNrm); 662 // 663 // .... end 664 // 665 } 666 #endif 667