1 #ifdef HAVE_PETSC 2 #include <stdio.h> 3 4 #include "petscsys.h" 5 #include "petscvec.h" 6 #include "petscmat.h" 7 #include "petscpc.h" 8 #include "petscksp.h" 9 10 #include "assert.h" 11 #include "common_c.h" 12 13 #include <FCMangle.h> 14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP) 19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR) 20 #define min(a,b) (((a) < (b)) ? (a) : (b)) 21 #define max(a,b) (((a) > (b)) ? (a) : (b)) 22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 24 25 typedef long long int gcorp_t; 26 27 void get_time(uint64_t* rv, uint64_t* cycle); 28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 29 30 //#include "auxmpi.h" 31 // static PetscOffset poff; 32 33 static Mat lhsP; 34 static PC pc; 35 static KSP ksp; 36 static Vec DyP, resP, DyPLocal; 37 static PetscErrorCode ierr; 38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 39 static IS LocalIndexSet; 40 static ISLocalToGlobalMapping VectorMapping; 41 static VecScatter scatter7; 42 static int firstpetsccall = 1; 43 44 static Mat lhsPs; 45 static PC pcs; 46 static KSP ksps; 47 static Vec DyPs, resPs, DyPLocals; 48 static IS LocalIndexSets; 49 static ISLocalToGlobalMapping VectorMappings; 50 static VecScatter scatter7s; 51 static int firstpetsccalls = 1; 52 53 static int rankdump=-1; // 8121 was the problem rank with 3.5.3 54 PetscReal resNrm; 55 56 57 void SolGMRp(double* y, double* ac, double* yold, 58 double* x, int* iBC, double* BC, 59 int* col, int* row, double* lhsk, 60 double* res, double* BDiag, 61 int* iper, 62 int* ilwork, double* shp, double* shgl, double* shpb, 63 double* shglb, double* Dy, double* rerr, long long int* fncorp) 64 { 65 // 66 // ---------------------------------------------------------------------- 67 // 68 // This is the preconditioned GMRES driver routine. 69 // 70 // input: 71 // y (nshg,ndof) : Y-variables at n+alpha_v 72 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 73 // yold (nshg,ndof) : Y-variables at beginning of step 74 // acold (nshg,ndof) : Primvar. accel. variable at begng step 75 // x (numnp,nsd) : node coordinates 76 // iBC (nshg) : BC codes 77 // BC (nshg,ndofBC) : BC constraint parameters 78 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 79 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 80 // 81 // output: 82 // res (nshg,nflow) : preconditioned residual 83 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 84 // 85 // 86 // Zdenek Johan, Winter 1991. (Fortran 90) 87 // ---------------------------------------------------------------------- 88 // 89 90 91 // Get variables from common_c.h 92 int nshg, nflow, nsd, iownnodes; 93 nshg = conpar.nshg; 94 nflow = conpar.nflow; 95 nsd = NSD; 96 iownnodes = conpar.iownnodes; 97 98 gcorp_t nshgt; 99 gcorp_t mbeg; 100 gcorp_t mend; 101 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 102 mbeg = (gcorp_t) newdim.minowned; 103 mend = (gcorp_t) newdim.maxowned; 104 105 106 int node, element, var, eqn; 107 double valtoinsert; 108 int nenl, iel, lelCat, lcsyst, iorder, nshl; 109 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 110 double DyFlat[nshg*nflow]; 111 double DyFlatPhasta[nshg*nflow]; 112 double rmes[nshg*nflow]; 113 // DEBUG 114 int i,j,k,l,m; 115 116 // FIXME: PetscScalar 117 double real_rtol, real_abstol, real_dtol; 118 // /DEBUG 119 //double parray[1]; // Should be a PetscScalar 120 double *parray; // Should be a PetscScalar 121 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 122 PetscInt petsc_n; 123 PetscOne = 1; 124 uint64_t duration[4]; 125 126 gcorp_t glbNZ; 127 128 if(firstpetsccall == 1) { 129 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill 130 // 131 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 132 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 133 for(i=0;i<iownnodes;i++) { 134 idiagnz[i]=0; 135 iodiagnz[i]=0; 136 } 137 i=0; 138 for(k=0;k<nshg;k++) { 139 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 140 // this node is not owned by this rank so we skip 141 } else { 142 for(j=col[i]-1;j<col[i+1]-1;j++) { 143 // assert(row[j]<=nshg); 144 // assert(fncorp[row[j]-1]<=nshgt); 145 glbNZ=fncorp[row[j]-1]; 146 if((glbNZ < mbeg) || (glbNZ > mend)) { 147 iodiagnz[i]++; 148 } else { 149 idiagnz[i]++; 150 } 151 } 152 i++; 153 } 154 } 155 gcorp_t mind=1000; 156 gcorp_t mino=1000; 157 gcorp_t maxd=0; 158 gcorp_t maxo=0; 159 for(i=0;i<iownnodes;i++) { 160 mind=min(mind,idiagnz[i]); 161 mino=min(mino,iodiagnz[i]); 162 maxd=max(maxd,idiagnz[i]); 163 maxo=max(maxo,iodiagnz[i]); 164 // iodiagnz[i]=max(iodiagnz[i],10); 165 // idiagnz[i]=max(idiagnz[i],10); 166 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 167 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 168 } 169 // the above was pretty good but below is faster and not too much more memory...of course once you do this 170 // could just use the constant fill parameters in create but keep it alive for potential later optimization 171 172 for(i=0;i<iownnodes;i++) { 173 iodiagnz[i]=1.3*maxd; 174 idiagnz[i]=1.3*maxd; 175 } 176 177 178 179 if(workfc.numpe < 200){ 180 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 181 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 182 } 183 // Print debug info 184 if(nshgt < 200){ 185 int irank; 186 for(irank=0;irank<workfc.numpe;irank++) { 187 if(irank == workfc.myrank){ 188 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 189 for(i=0;i<iownnodes;i++) { 190 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 191 } 192 } 193 MPI_Barrier(MPI_COMM_WORLD); 194 } 195 } 196 petsc_bs = (PetscInt) nflow; 197 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 198 petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 199 petsc_PA = (PetscInt) 40; 200 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 201 0, idiagnz, 0, iodiagnz, &lhsP); 202 203 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 204 205 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 206 #ifdef JEDBROWN 207 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 208 #endif 209 ierr = MatSetUp(lhsP); 210 211 PetscInt myMatStart, myMatEnd; 212 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 213 //debug 214 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 215 } 216 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 217 218 // 219 // .... *******************>> Element Data Formation <<****************** 220 // 221 // 222 // .... set the parameters for flux and surface tension calculations 223 // 224 // 225 genpar.idflx = 0 ; 226 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 227 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 228 229 get_time(duration, (duration+1)); 230 231 ElmGMRPETSc(y, ac, x, 232 shp, shgl, iBC, 233 BC, shpb, 234 shglb, res, 235 rmes, iper, 236 ilwork, rerr , &lhsP); 237 238 get_time((duration+2), (duration+3)); 239 get_max_time_diff((duration), (duration+2), 240 (duration+1), (duration+3), 241 "ElmGMRPETSc \0"); // char(0)) 242 if(firstpetsccall == 1) { 243 // Setup IndexSet. For now, we mimic vector insertion procedure 244 // Since we always reference by global indexes this doesn't matter 245 // except for cache performance) 246 // TODO: Better arrangment? 247 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 248 PetscInt nodetoinsert; 249 nodetoinsert = 0; 250 k=0; 251 //debug 252 if(workfc.myrank == rankdump) { 253 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 254 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 255 } 256 if(workfc.numpe > 1) { 257 for (i=0; i<nshg ; i++) { 258 nodetoinsert = fncorp[i]-1; 259 //debug 260 if(workfc.myrank == rankdump) { 261 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 262 } 263 264 // assert(fncorp[i]>=0); 265 for (j=1; j<=nflow; j++) { 266 indexsetary[k] = nodetoinsert*nflow+(j-1); 267 assert(indexsetary[k]>=0); 268 // assert(fncorp[i]>=0); 269 k = k+1; 270 } 271 } 272 } 273 else { 274 for (i=0; i<nshg ; i++) { 275 nodetoinsert = i; 276 for (j=1; j<=nflow; j++) { 277 indexsetary[k] = nodetoinsert*nflow+(j-1); 278 k = k+1; 279 } 280 } 281 } 282 283 // Create Vector Index Maps 284 petsc_n = (PetscInt) nshg * (PetscInt) nflow; 285 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 286 PETSC_COPY_VALUES, &LocalIndexSet); 287 free(indexsetary); 288 } 289 if(genpar.lhs == 1) { 290 get_time((duration), (duration+1)); 291 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 292 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 293 get_time((duration+2),(duration+3)); 294 get_max_time_diff((duration), (duration+2), 295 (duration+1), (duration+3), 296 "MatAssembly \0"); // char(0)) 297 get_time(duration, (duration+1)); 298 } 299 if(firstpetsccall == 1) { 300 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 301 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 302 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 303 } 304 ierr = VecZeroEntries(resP); 305 if(firstpetsccall == 1) { 306 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 307 } 308 309 PetscRow=0; 310 k = 0; 311 int index; 312 for (i=0; i<nshg; i++ ){ 313 for (j = 1; j<=nflow; j++){ 314 index = i + (j-1)*nshg; 315 valtoinsert = res[index]; 316 if(workfc.numpe > 1) { 317 PetscRow = (fncorp[i]-1)*nflow+(j-1); 318 } 319 else { 320 PetscRow = i*nflow+(j-1); 321 } 322 assert(fncorp[i]<=nshgt); 323 assert(fncorp[i]>0); 324 assert(PetscRow>=0); 325 assert(PetscRow<=nshgt*nflow); 326 ierr = VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES); 327 } 328 } 329 ierr = VecAssemblyBegin(resP); 330 ierr = VecAssemblyEnd(resP); 331 ierr = VecNorm(resP,NORM_2,&resNrm); 332 get_time((duration+2), (duration+3)); 333 get_max_time_diff((duration), (duration+2), 334 (duration+1), (duration+3), 335 "VectorWorkPre \0"); // char(0)) 336 337 get_time((duration),(duration+1)); 338 339 if(firstpetsccall == 1) { 340 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 341 ierr = KSPSetOperators(ksp, lhsP, lhsP); 342 ierr = KSPGetPC(ksp, &pc); 343 ierr = PCSetType(pc, PCPBJACOBI); 344 PetscInt maxits; 345 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 346 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 347 ierr = KSPSetFromOptions(ksp); 348 } 349 ierr = KSPSolve(ksp, resP, DyP); 350 get_time((duration+2),(duration+3)); 351 get_max_time_diff((duration), (duration+2), 352 (duration+1), (duration+3), 353 "KSPSolve \0"); // char(0)) 354 get_time((duration),(duration+1)); 355 if(firstpetsccall == 1) { 356 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7); 357 } 358 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 359 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 360 ierr = VecGetArray(DyPLocal, &parray); 361 PetscRow = 0; 362 for ( node = 0; node< nshg; node++) { 363 for (var = 1; var<= nflow; var++) { 364 index = node + (var-1)*nshg; 365 Dy[index] = parray[PetscRow]; 366 PetscRow = PetscRow+1; 367 } 368 } 369 ierr = VecRestoreArray(DyPLocal, &parray); 370 371 firstpetsccall = 0; 372 373 // .... output the statistics 374 // 375 itrpar.iKs=0; // see rstat() 376 PetscInt its; 377 ierr = KSPGetIterationNumber(ksp, &its); 378 itrpar.iKs = (int) its; 379 get_time((duration+2),(duration+3)); 380 get_max_time_diff((duration), (duration+2), 381 (duration+1), (duration+3), 382 "solWork \0"); // char(0)) 383 itrpar.ntotGM += (int) its; 384 rstatp (&resNrm); 385 // 386 // .... end 387 // 388 } 389 390 void SolGMRpSclr(double* y, double* ac, 391 double* x, double* elDw, int* iBC, double* BC, 392 int* col, int* row, 393 int* iper, 394 int* ilwork, double* shp, double* shgl, double* shpb, 395 double* shglb, double* res, double* Dy, long long int* fncorp) 396 { 397 // 398 // ---------------------------------------------------------------------- 399 // 400 // This is the preconditioned GMRES driver routine. 401 // 402 // input: 403 // y (nshg,ndof) : Y-variables at n+alpha_v 404 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 405 // yold (nshg,ndof) : Y-variables at beginning of step 406 // acold (nshg,ndof) : Primvar. accel. variable at begng step 407 // x (numnp,nsd) : node coordinates 408 // iBC (nshg) : BC codes 409 // BC (nshg,ndofBC) : BC constraint parameters 410 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 411 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 412 // 413 // ---------------------------------------------------------------------- 414 // 415 416 417 // Get variables from common_c.h 418 int nshg, nflow, nsd, iownnodes; 419 nshg = conpar.nshg; 420 nsd = NSD; 421 iownnodes = conpar.iownnodes; 422 423 gcorp_t nshgt; 424 gcorp_t mbeg; 425 gcorp_t mend; 426 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 427 mbeg = (gcorp_t) newdim.minowned; 428 mend = (gcorp_t) newdim.maxowned; 429 430 431 int node, element, var, eqn; 432 double valtoinsert; 433 int nenl, iel, lelCat, lcsyst, iorder, nshl; 434 int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 435 double DyFlats[nshg]; 436 double DyFlatPhastas[nshg]; 437 double rmes[nshg]; 438 // DEBUG 439 int i,j,k,l,m; 440 441 double real_rtol, real_abstol, real_dtol; 442 double *parray; 443 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 444 PetscInt petsc_n; 445 PetscOne = 1; 446 uint64_t duration[4]; 447 448 449 // 450 // 451 // .... *******************>> Element Data Formation <<****************** 452 // 453 // 454 // .... set the parameters for flux and surface tension calculations 455 // 456 // 457 genpar.idflx = 0 ; 458 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 459 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 460 461 462 463 gcorp_t glbNZ; 464 465 if(firstpetsccalls == 1) { 466 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 467 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 468 for(i=0;i<iownnodes;i++) { 469 idiagnz[i]=0; 470 iodiagnz[i]=0; 471 } 472 i=0; 473 for(k=0;k<nshg;k++) { 474 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 475 // this node is not owned by this rank so we skip 476 } else { 477 for(j=col[i]-1;j<col[i+1]-1;j++) { 478 glbNZ=fncorp[row[j]-1]; 479 if((glbNZ < mbeg) || (glbNZ > mend)) { 480 iodiagnz[i]++; 481 } else { 482 idiagnz[i]++; 483 } 484 } 485 i++; 486 } 487 } 488 gcorp_t mind=1000; 489 gcorp_t mino=1000; 490 gcorp_t maxd=0; 491 gcorp_t maxo=0; 492 for(i=0;i<iownnodes;i++) { 493 mind=min(mind,idiagnz[i]); 494 mino=min(mino,iodiagnz[i]); 495 maxd=max(maxd,idiagnz[i]); 496 maxo=max(maxo,iodiagnz[i]); 497 } 498 499 for(i=0;i<iownnodes;i++) { 500 iodiagnz[i]=1.3*maxd; 501 idiagnz[i]=1.3*maxd; 502 } 503 504 505 506 // } 507 // if(firstpetsccalls == 1) { 508 509 petsc_m = (PetscInt) iownnodes; 510 petsc_M = (PetscInt) nshgt; 511 petsc_PA = (PetscInt) 40; 512 513 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 514 0, idiagnz, 0, iodiagnz, &lhsPs); 515 516 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 517 518 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 519 #ifdef HIDEJEDBROWN 520 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 521 #endif 522 ierr = MatSetUp(lhsPs); 523 524 PetscInt myMatStart, myMatEnd; 525 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 526 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 527 } 528 // MPI_Barrier(MPI_COMM_WORLD); 529 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 530 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 531 532 get_time(duration, (duration+1)); 533 534 // MPI_Barrier(MPI_COMM_WORLD); 535 // if(workfc.myrank ==0) printf("Before elmgmr \n"); 536 // for (i=0; i<nshg ; i++) { 537 // assert(fncorp[i]>0); 538 // } 539 540 ElmGMRPETScSclr(y, ac, x, elDw, 541 shp, shgl, iBC, 542 BC, shpb, 543 shglb, res, 544 rmes, iper, 545 ilwork, &lhsPs); 546 if(firstpetsccalls == 1) { 547 // Setup IndexSet. For now, we mimic vector insertion procedure 548 // Since we always reference by global indexes this doesn't matter 549 // except for cache performance) 550 // TODO: Better arrangment? 551 552 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 553 PetscInt nodetoinsert; 554 555 nodetoinsert = 0; 556 k=0; 557 558 //debug 559 if(workfc.myrank == rankdump) { 560 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 561 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 562 } 563 564 if(workfc.numpe > 1) { 565 for (i=0; i<nshg ; i++) { 566 nodetoinsert = fncorp[i]-1; 567 //debug 568 if(workfc.myrank == rankdump) { 569 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 570 } 571 572 indexsetarys[k] = nodetoinsert; 573 k = k+1; 574 } 575 } 576 else { 577 for (i=0; i<nshg ; i++) { 578 nodetoinsert = i; 579 indexsetarys[k] = nodetoinsert; 580 k = k+1; 581 } 582 } 583 584 // Create Vector Index Maps 585 petsc_n = (PetscInt) nshg; 586 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 587 PETSC_COPY_VALUES, &LocalIndexSets); 588 free(indexsetarys); 589 } 590 if(genpar.lhs ==1) { 591 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 592 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 593 } 594 if(firstpetsccalls == 1) { 595 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 596 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 597 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 598 } 599 ierr = VecZeroEntries(resPs); 600 if(firstpetsccalls == 1) { 601 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 602 } 603 604 PetscRow=0; 605 k = 0; 606 int index; 607 for (i=0; i<nshg; i++ ){ 608 valtoinsert = res[i]; 609 if(workfc.numpe > 1) { 610 PetscRow = (fncorp[i]-1); 611 } 612 else { 613 PetscRow = i; 614 } 615 assert(fncorp[i]<=nshgt); 616 assert(fncorp[i]>0); 617 assert(PetscRow>=0); 618 assert(PetscRow<=nshgt); 619 ierr = VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES); 620 } 621 ierr = VecAssemblyBegin(resPs); 622 ierr = VecAssemblyEnd(resPs); 623 ierr = VecNorm(resPs,NORM_2,&resNrm); 624 625 if(firstpetsccalls == 1) { 626 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 627 ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 628 ierr = KSPGetPC(ksps, &pcs); 629 ierr = PCSetType(pcs, PCPBJACOBI); 630 PetscInt maxits; 631 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 632 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 633 ierr = KSPSetFromOptions(ksps); 634 } 635 ierr = KSPSolve(ksps, resPs, DyPs); 636 if(firstpetsccalls == 1) { 637 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s); 638 } 639 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 640 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 641 ierr = VecGetArray(DyPLocals, &parray); 642 PetscRow = 0; 643 for ( node = 0; node< nshg; node++) { 644 index = node; 645 Dy[index] = parray[PetscRow]; 646 PetscRow = PetscRow+1; 647 } 648 ierr = VecRestoreArray(DyPLocals, &parray); 649 650 firstpetsccalls = 0; 651 652 // .... output the statistics 653 // 654 // itrpar.iKss=0; // see rstat() 655 PetscInt its; 656 ierr = KSPGetIterationNumber(ksps, &its); 657 itrpar.iKss = (int) its; 658 itrpar.ntotGMs += (int) its; 659 rstatpSclr (&resNrm); 660 // 661 // .... end 662 // 663 } 664 #endif 665