1*10167291SKenneth E. Jansen #include <stdio.h> 2*10167291SKenneth E. Jansen 3*10167291SKenneth E. Jansen #include "petscsys.h" 4*10167291SKenneth E. Jansen #include "petscvec.h" 5*10167291SKenneth E. Jansen #include "petscmat.h" 6*10167291SKenneth E. Jansen #include "petscpc.h" 7*10167291SKenneth E. Jansen #include "petscksp.h" 8*10167291SKenneth E. Jansen 9*10167291SKenneth E. Jansen #include "assert.h" 10*10167291SKenneth E. Jansen #include "common_c.h" 11*10167291SKenneth E. Jansen 12*10167291SKenneth E. Jansen #include <FCMangle.h> 13*10167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 14*10167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 15*10167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 16*10167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 17*10167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 18*10167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 19*10167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b)) 20*10167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b)) 21*10167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 22*10167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 23*10167291SKenneth E. Jansen 24*10167291SKenneth E. Jansen typedef long long int gcorp_t; 25*10167291SKenneth E. Jansen 26*10167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle); 27*10167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 28*10167291SKenneth E. Jansen 29*10167291SKenneth E. Jansen //#include "auxmpi.h" 30*10167291SKenneth E. Jansen // static PetscOffset poff; 31*10167291SKenneth E. Jansen 32*10167291SKenneth E. Jansen static Mat lhsP; 33*10167291SKenneth E. Jansen static PC pc; 34*10167291SKenneth E. Jansen static KSP ksp; 35*10167291SKenneth E. Jansen static Vec DyP, resP, DyPLocal, resPGlobal; 36*10167291SKenneth E. Jansen static PetscErrorCode ierr; 37*10167291SKenneth E. Jansen static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 38*10167291SKenneth E. Jansen static IS LocalIndexSet, GlobalIndexSet; 39*10167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMapping; 40*10167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMapping; 41*10167291SKenneth E. Jansen static VecScatter scatter7; 42*10167291SKenneth E. Jansen static int firstpetsccall = 1; 43*10167291SKenneth E. Jansen 44*10167291SKenneth E. Jansen static Mat lhsPs; 45*10167291SKenneth E. Jansen static PC pcs; 46*10167291SKenneth E. Jansen static KSP ksps; 47*10167291SKenneth E. Jansen static Vec DyPs, resPs, DyPLocals, resPGlobals; 48*10167291SKenneth E. Jansen static IS LocalIndexSets, GlobalIndexSets; 49*10167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMappings; 50*10167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMappings; 51*10167291SKenneth E. Jansen static VecScatter scatter7s; 52*10167291SKenneth E. Jansen static int firstpetsccalls = 1; 53*10167291SKenneth E. Jansen 54*10167291SKenneth E. Jansen void SolGMRp(double* y, double* ac, double* yold, 55*10167291SKenneth E. Jansen double* x, int* iBC, double* BC, 56*10167291SKenneth E. Jansen int* col, int* row, double* lhsk, 57*10167291SKenneth E. Jansen double* res, double* BDiag, 58*10167291SKenneth E. Jansen int* iper, 59*10167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 60*10167291SKenneth E. Jansen double* shglb, double* Dy, double* rerr, long long int* fncorp) 61*10167291SKenneth E. Jansen { 62*10167291SKenneth E. Jansen // 63*10167291SKenneth E. Jansen // ---------------------------------------------------------------------- 64*10167291SKenneth E. Jansen // 65*10167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 66*10167291SKenneth E. Jansen // 67*10167291SKenneth E. Jansen // input: 68*10167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 69*10167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 70*10167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 71*10167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 72*10167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 73*10167291SKenneth E. Jansen // iBC (nshg) : BC codes 74*10167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 75*10167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 76*10167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 77*10167291SKenneth E. Jansen // 78*10167291SKenneth E. Jansen // output: 79*10167291SKenneth E. Jansen // res (nshg,nflow) : preconditioned residual 80*10167291SKenneth E. Jansen // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 81*10167291SKenneth E. Jansen // 82*10167291SKenneth E. Jansen // 83*10167291SKenneth E. Jansen // Zdenek Johan, Winter 1991. (Fortran 90) 84*10167291SKenneth E. Jansen // ---------------------------------------------------------------------- 85*10167291SKenneth E. Jansen // 86*10167291SKenneth E. Jansen 87*10167291SKenneth E. Jansen 88*10167291SKenneth E. Jansen // Get variables from common_c.h 89*10167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 90*10167291SKenneth E. Jansen nshg = conpar.nshg; 91*10167291SKenneth E. Jansen nflow = conpar.nflow; 92*10167291SKenneth E. Jansen nsd = NSD; 93*10167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 94*10167291SKenneth E. Jansen 95*10167291SKenneth E. Jansen gcorp_t nshgt; 96*10167291SKenneth E. Jansen gcorp_t mbeg; 97*10167291SKenneth E. Jansen gcorp_t mend; 98*10167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 99*10167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 100*10167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 101*10167291SKenneth E. Jansen 102*10167291SKenneth E. Jansen 103*10167291SKenneth E. Jansen int node, element, var, eqn; 104*10167291SKenneth E. Jansen double valtoinsert; 105*10167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 106*10167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 107*10167291SKenneth E. Jansen double DyFlat[nshg*nflow]; 108*10167291SKenneth E. Jansen double DyFlatPhasta[nshg*nflow]; 109*10167291SKenneth E. Jansen double rmes[nshg*nflow]; 110*10167291SKenneth E. Jansen // DEBUG 111*10167291SKenneth E. Jansen int i,j,k,l,m; 112*10167291SKenneth E. Jansen //int indexsetary[nflow*nshg]; 113*10167291SKenneth E. Jansen //int gblindexsetary[nflow*nshg]; 114*10167291SKenneth E. Jansen PetscInt indexsetary[nflow*nshg]; 115*10167291SKenneth E. Jansen PetscInt gblindexsetary[nflow*nshg]; 116*10167291SKenneth E. Jansen PetscInt nodetoinsert; 117*10167291SKenneth E. Jansen 118*10167291SKenneth E. Jansen // FIXME: PetscScalar 119*10167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 120*10167291SKenneth E. Jansen // /DEBUG 121*10167291SKenneth E. Jansen //double parray[1]; // Should be a PetscScalar 122*10167291SKenneth E. Jansen double *parray; // Should be a PetscScalar 123*10167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 124*10167291SKenneth E. Jansen PetscInt petsc_n; 125*10167291SKenneth E. Jansen PetscOne = 1; 126*10167291SKenneth E. Jansen uint64_t duration[4]; 127*10167291SKenneth E. Jansen 128*10167291SKenneth E. Jansen // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 129*10167291SKenneth E. Jansen 130*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 131*10167291SKenneth E. Jansen /* call get_time(duration(1),duration(2)); 132*10167291SKenneth E. Jansen call system('sleep 10'); 133*10167291SKenneth E. Jansen call get_time(duration(3),duration(4)); 134*10167291SKenneth E. Jansen call get_max_time_diff(duration(1), duration(3), 135*10167291SKenneth E. Jansen duration(2), duration(4), 136*10167291SKenneth E. Jansen "TenSecondSleep " // char(0)) 137*10167291SKenneth E. Jansen if(myrank .eq. 0) { 138*10167291SKenneth E. Jansen !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 139*10167291SKenneth E. Jansen } 140*10167291SKenneth E. Jansen */ 141*10167291SKenneth E. Jansen } 142*10167291SKenneth E. Jansen // 143*10167291SKenneth E. Jansen // 144*10167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 145*10167291SKenneth E. Jansen // 146*10167291SKenneth E. Jansen // 147*10167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 148*10167291SKenneth E. Jansen // 149*10167291SKenneth E. Jansen // 150*10167291SKenneth E. Jansen genpar.idflx = 0 ; 151*10167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 152*10167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 153*10167291SKenneth E. Jansen 154*10167291SKenneth E. Jansen 155*10167291SKenneth E. Jansen 156*10167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 157*10167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 158*10167291SKenneth E. Jansen gcorp_t glbNZ; 159*10167291SKenneth E. Jansen 160*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 161*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 162*10167291SKenneth E. Jansen idiagnz[i]=0; 163*10167291SKenneth E. Jansen iodiagnz[i]=0; 164*10167291SKenneth E. Jansen } 165*10167291SKenneth E. Jansen i=0; 166*10167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 167*10167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 168*10167291SKenneth E. Jansen // this node is not owned by this rank so we skip 169*10167291SKenneth E. Jansen } else { 170*10167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 171*10167291SKenneth E. Jansen glbNZ=fncorp[row[j]]; 172*10167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 173*10167291SKenneth E. Jansen iodiagnz[i]++; 174*10167291SKenneth E. Jansen } else { 175*10167291SKenneth E. Jansen idiagnz[i]++; 176*10167291SKenneth E. Jansen } 177*10167291SKenneth E. Jansen } 178*10167291SKenneth E. Jansen i++; 179*10167291SKenneth E. Jansen } 180*10167291SKenneth E. Jansen } 181*10167291SKenneth E. Jansen gcorp_t mind=1000; 182*10167291SKenneth E. Jansen gcorp_t mino=1000; 183*10167291SKenneth E. Jansen gcorp_t maxd=0; 184*10167291SKenneth E. Jansen gcorp_t maxo=0; 185*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 186*10167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 187*10167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 188*10167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 189*10167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 190*10167291SKenneth E. Jansen // iodiagnz[i]=max(iodiagnz[i],10); 191*10167291SKenneth E. Jansen // idiagnz[i]=max(idiagnz[i],10); 192*10167291SKenneth E. Jansen // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 193*10167291SKenneth E. Jansen // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 194*10167291SKenneth E. Jansen } 195*10167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this 196*10167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization 197*10167291SKenneth E. Jansen 198*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 199*10167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 200*10167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 201*10167291SKenneth E. Jansen } 202*10167291SKenneth E. Jansen 203*10167291SKenneth E. Jansen 204*10167291SKenneth E. Jansen 205*10167291SKenneth E. Jansen if(workfc.numpe < 200){ 206*10167291SKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 207*10167291SKenneth E. Jansen printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 208*10167291SKenneth E. Jansen } 209*10167291SKenneth E. Jansen // Print debug info 210*10167291SKenneth E. Jansen if(nshgt < 200){ 211*10167291SKenneth E. Jansen int irank; 212*10167291SKenneth E. Jansen for(irank=0;irank<workfc.numpe;irank++) { 213*10167291SKenneth E. Jansen if(irank == workfc.myrank){ 214*10167291SKenneth E. Jansen printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 215*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 216*10167291SKenneth E. Jansen printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 217*10167291SKenneth E. Jansen } 218*10167291SKenneth E. Jansen } 219*10167291SKenneth E. Jansen MPI_Barrier(MPI_COMM_WORLD); 220*10167291SKenneth E. Jansen } 221*10167291SKenneth E. Jansen } 222*10167291SKenneth E. Jansen } 223*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 224*10167291SKenneth E. Jansen 225*10167291SKenneth E. Jansen petsc_bs = (PetscInt) nflow; 226*10167291SKenneth E. Jansen petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 227*10167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 228*10167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 229*10167291SKenneth E. Jansen // TODO: fixe nshgt for 64 bit integer!!!!! 230*10167291SKenneth E. Jansen 231*10167291SKenneth E. Jansen //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 232*10167291SKenneth E. Jansen // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 233*10167291SKenneth E. Jansen 234*10167291SKenneth E. Jansen // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 235*10167291SKenneth E. Jansen // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 236*10167291SKenneth E. Jansen 237*10167291SKenneth E. Jansen // next 2 do pre-allocate 238*10167291SKenneth E. Jansen 239*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 240*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 241*10167291SKenneth E. Jansen 242*10167291SKenneth E. Jansen ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 243*10167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsP); 244*10167291SKenneth E. Jansen 245*10167291SKenneth E. Jansen // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 246*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 247*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 248*10167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 249*10167291SKenneth E. Jansen 250*10167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 251*10167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 252*10167291SKenneth E. Jansen ierr = MatSetUp(lhsP); 253*10167291SKenneth E. Jansen 254*10167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 255*10167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 256*10167291SKenneth E. Jansen } 257*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 258*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 259*10167291SKenneth E. Jansen if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 260*10167291SKenneth E. Jansen 261*10167291SKenneth E. Jansen get_time(duration, (duration+1)); 262*10167291SKenneth E. Jansen 263*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 264*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 265*10167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 266*10167291SKenneth E. Jansen // assert(fncorp[i]>0); 267*10167291SKenneth E. Jansen // } 268*10167291SKenneth E. Jansen 269*10167291SKenneth E. Jansen ElmGMRPETSc(y, ac, x, 270*10167291SKenneth E. Jansen shp, shgl, iBC, 271*10167291SKenneth E. Jansen BC, shpb, 272*10167291SKenneth E. Jansen shglb, res, 273*10167291SKenneth E. Jansen rmes, iper, 274*10167291SKenneth E. Jansen ilwork, rerr , &lhsP); 275*10167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 276*10167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 277*10167291SKenneth E. Jansen (duration+1), (duration+3), 278*10167291SKenneth E. Jansen "ElmGMRPETSc \0"); // char(0)) 279*10167291SKenneth E. Jansen // printf("after ElmGMRPETSc\n"); 280*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 281*10167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 282*10167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 283*10167291SKenneth E. Jansen // except for cache performance) 284*10167291SKenneth E. Jansen // TODO: Better arrangment? 285*10167291SKenneth E. Jansen nodetoinsert = 0; 286*10167291SKenneth E. Jansen k=0; 287*10167291SKenneth E. Jansen if(workfc.numpe > 1) { 288*10167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 289*10167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 290*10167291SKenneth E. Jansen // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 291*10167291SKenneth E. Jansen // assert(fncorp[i]>=0); 292*10167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 293*10167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 294*10167291SKenneth E. Jansen assert(indexsetary[k]>=0); 295*10167291SKenneth E. Jansen // assert(fncorp[i]>=0); 296*10167291SKenneth E. Jansen k = k+1; 297*10167291SKenneth E. Jansen } 298*10167291SKenneth E. Jansen } 299*10167291SKenneth E. Jansen } 300*10167291SKenneth E. Jansen else { 301*10167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 302*10167291SKenneth E. Jansen nodetoinsert = i; 303*10167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 304*10167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 305*10167291SKenneth E. Jansen k = k+1; 306*10167291SKenneth E. Jansen } 307*10167291SKenneth E. Jansen } 308*10167291SKenneth E. Jansen } 309*10167291SKenneth E. Jansen 310*10167291SKenneth E. Jansen for (i=0; i<nshg*nflow; i++) { 311*10167291SKenneth E. Jansen gblindexsetary[i] = i ; //TODO: better option for performance? 312*10167291SKenneth E. Jansen } 313*10167291SKenneth E. Jansen // Create Vector Index Maps 314*10167291SKenneth E. Jansen petsc_n = (PetscInt) nshg * (PetscInt) nflow; 315*10167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 316*10167291SKenneth E. Jansen // PETSC_COPY_VALUES, &LocalIndexSet); 317*10167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 318*10167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSet); 319*10167291SKenneth E. Jansen // printf("after 1st ISCreateGeneral %d\n", ierr); 320*10167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 321*10167291SKenneth E. Jansen // PETSC_COPY_VALUES, &GlobalIndexSet); 322*10167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 323*10167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSet); 324*10167291SKenneth E. Jansen // printf("after 2nd ISCreateGeneral %d\n", ierr); 325*10167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 326*10167291SKenneth E. Jansen // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 327*10167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 328*10167291SKenneth E. Jansen // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 329*10167291SKenneth E. Jansen } 330*10167291SKenneth E. Jansen if(genpar.lhs == 1) { 331*10167291SKenneth E. Jansen get_time((duration), (duration+1)); 332*10167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 333*10167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 334*10167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 335*10167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 336*10167291SKenneth E. Jansen (duration+1), (duration+3), 337*10167291SKenneth E. Jansen "MatAssembly \0"); // char(0)) 338*10167291SKenneth E. Jansen get_time(duration, (duration+1)); 339*10167291SKenneth E. Jansen } 340*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 341*10167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 342*10167291SKenneth E. Jansen // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 343*10167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 344*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 345*10167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 346*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 347*10167291SKenneth E. Jansen } 348*10167291SKenneth E. Jansen ierr = VecZeroEntries(resP); 349*10167291SKenneth E. Jansen ierr = VecZeroEntries(DyP); 350*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 351*10167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal); 352*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal); 353*10167291SKenneth E. Jansen //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 354*10167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 355*10167291SKenneth E. Jansen } 356*10167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobal); 357*10167291SKenneth E. Jansen // call VecZeroEntries(DyPLocal, ierr) 358*10167291SKenneth E. Jansen 359*10167291SKenneth E. Jansen PetscRow=0; 360*10167291SKenneth E. Jansen k = 0; 361*10167291SKenneth E. Jansen int index; 362*10167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 363*10167291SKenneth E. Jansen for (j = 1; j<=nflow; j++){ 364*10167291SKenneth E. Jansen index = i + (j-1)*nshg; 365*10167291SKenneth E. Jansen valtoinsert = res[index]; 366*10167291SKenneth E. Jansen if(workfc.numpe > 1) { 367*10167291SKenneth E. Jansen PetscRow = (fncorp[i]-1)*nflow+(j-1); 368*10167291SKenneth E. Jansen } 369*10167291SKenneth E. Jansen else { 370*10167291SKenneth E. Jansen PetscRow = i*nflow+(j-1); 371*10167291SKenneth E. Jansen } 372*10167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 373*10167291SKenneth E. Jansen assert(fncorp[i]>0); 374*10167291SKenneth E. Jansen assert(PetscRow>=0); 375*10167291SKenneth E. Jansen assert(PetscRow<=nshgt*nflow); 376*10167291SKenneth E. Jansen ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 377*10167291SKenneth E. Jansen } 378*10167291SKenneth E. Jansen } 379*10167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 380*10167291SKenneth E. Jansen ierr = VecAssemblyBegin(resP); 381*10167291SKenneth E. Jansen ierr = VecAssemblyEnd(resP); 382*10167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 383*10167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 384*10167291SKenneth E. Jansen (duration+1), (duration+3), 385*10167291SKenneth E. Jansen "VectorWorkPre \0"); // char(0)) 386*10167291SKenneth E. Jansen 387*10167291SKenneth E. Jansen get_time((duration),(duration+1)); 388*10167291SKenneth E. Jansen 389*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 390*10167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 391*10167291SKenneth E. Jansen ierr = KSPSetOperators(ksp, lhsP, lhsP); 392*10167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 393*10167291SKenneth E. Jansen ierr = KSPGetPC(ksp, &pc); 394*10167291SKenneth E. Jansen ierr = PCSetType(pc, PCPBJACOBI); 395*10167291SKenneth E. Jansen PetscInt maxits; 396*10167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 397*10167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 398*10167291SKenneth E. Jansen ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 399*10167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksp); 400*10167291SKenneth E. Jansen } 401*10167291SKenneth E. Jansen ierr = KSPSolve(ksp, resP, DyP); 402*10167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 403*10167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 404*10167291SKenneth E. Jansen (duration+1), (duration+3), 405*10167291SKenneth E. Jansen "KSPSolve \0"); // char(0)) 406*10167291SKenneth E. Jansen //if(myrank .eq. 0) then 407*10167291SKenneth E. Jansen //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 408*10167291SKenneth E. Jansen //end if 409*10167291SKenneth E. Jansen get_time((duration),(duration+1)); 410*10167291SKenneth E. Jansen if(firstpetsccall == 1) { 411*10167291SKenneth E. Jansen ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 412*10167291SKenneth E. Jansen } 413*10167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 414*10167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 415*10167291SKenneth E. Jansen ierr = VecGetArray(DyPLocal, &parray); 416*10167291SKenneth E. Jansen PetscRow = 0; 417*10167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 418*10167291SKenneth E. Jansen for (var = 1; var<= nflow; var++) { 419*10167291SKenneth E. Jansen index = node + (var-1)*nshg; 420*10167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 421*10167291SKenneth E. Jansen PetscRow = PetscRow+1; 422*10167291SKenneth E. Jansen } 423*10167291SKenneth E. Jansen } 424*10167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocal, &parray); 425*10167291SKenneth E. Jansen 426*10167291SKenneth E. Jansen firstpetsccall = 0; 427*10167291SKenneth E. Jansen // later timer ('Solver '); 428*10167291SKenneth E. Jansen 429*10167291SKenneth E. Jansen // .... output the statistics 430*10167291SKenneth E. Jansen // 431*10167291SKenneth E. Jansen itrpar.iKs=0; // see rstat() 432*10167291SKenneth E. Jansen PetscInt its; 433*10167291SKenneth E. Jansen //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 434*10167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksp, &its); 435*10167291SKenneth E. Jansen itrpar.iKs = (int) its; 436*10167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 437*10167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 438*10167291SKenneth E. Jansen (duration+1), (duration+3), 439*10167291SKenneth E. Jansen "solWork \0"); // char(0)) 440*10167291SKenneth E. Jansen itrpar.ntotGM += (int) its; 441*10167291SKenneth E. Jansen rstat (res, ilwork); 442*10167291SKenneth E. Jansen // 443*10167291SKenneth E. Jansen // .... stop the timer 444*10167291SKenneth E. Jansen // 445*10167291SKenneth E. Jansen // 3002 continue ! no solve just res. 446*10167291SKenneth E. Jansen // later call timer ('Back ') 447*10167291SKenneth E. Jansen // 448*10167291SKenneth E. Jansen // .... end 449*10167291SKenneth E. Jansen // 450*10167291SKenneth E. Jansen } 451*10167291SKenneth E. Jansen 452*10167291SKenneth E. Jansen void SolGMRpSclr(double* y, double* ac, 453*10167291SKenneth E. Jansen double* x, double* elDw, int* iBC, double* BC, 454*10167291SKenneth E. Jansen int* col, int* row, 455*10167291SKenneth E. Jansen int* iper, 456*10167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 457*10167291SKenneth E. Jansen double* shglb, double* res, double* Dy, long long int* fncorp) 458*10167291SKenneth E. Jansen { 459*10167291SKenneth E. Jansen // 460*10167291SKenneth E. Jansen // ---------------------------------------------------------------------- 461*10167291SKenneth E. Jansen // 462*10167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 463*10167291SKenneth E. Jansen // 464*10167291SKenneth E. Jansen // input: 465*10167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 466*10167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 467*10167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 468*10167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 469*10167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 470*10167291SKenneth E. Jansen // iBC (nshg) : BC codes 471*10167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 472*10167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 473*10167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 474*10167291SKenneth E. Jansen // 475*10167291SKenneth E. Jansen // ---------------------------------------------------------------------- 476*10167291SKenneth E. Jansen // 477*10167291SKenneth E. Jansen 478*10167291SKenneth E. Jansen 479*10167291SKenneth E. Jansen // Get variables from common_c.h 480*10167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 481*10167291SKenneth E. Jansen nshg = conpar.nshg; 482*10167291SKenneth E. Jansen nsd = NSD; 483*10167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 484*10167291SKenneth E. Jansen 485*10167291SKenneth E. Jansen gcorp_t nshgt; 486*10167291SKenneth E. Jansen gcorp_t mbeg; 487*10167291SKenneth E. Jansen gcorp_t mend; 488*10167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 489*10167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 490*10167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 491*10167291SKenneth E. Jansen 492*10167291SKenneth E. Jansen 493*10167291SKenneth E. Jansen int node, element, var, eqn; 494*10167291SKenneth E. Jansen double valtoinsert; 495*10167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 496*10167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 497*10167291SKenneth E. Jansen double DyFlats[nshg]; 498*10167291SKenneth E. Jansen double DyFlatPhastas[nshg]; 499*10167291SKenneth E. Jansen double rmes[nshg]; 500*10167291SKenneth E. Jansen // DEBUG 501*10167291SKenneth E. Jansen int i,j,k,l,m; 502*10167291SKenneth E. Jansen PetscInt indexsetarys[nshg]; 503*10167291SKenneth E. Jansen PetscInt gblindexsetarys[nshg]; 504*10167291SKenneth E. Jansen PetscInt nodetoinsert; 505*10167291SKenneth E. Jansen 506*10167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 507*10167291SKenneth E. Jansen double *parray; 508*10167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 509*10167291SKenneth E. Jansen PetscInt petsc_n; 510*10167291SKenneth E. Jansen PetscOne = 1; 511*10167291SKenneth E. Jansen uint64_t duration[4]; 512*10167291SKenneth E. Jansen 513*10167291SKenneth E. Jansen 514*10167291SKenneth E. Jansen // 515*10167291SKenneth E. Jansen // 516*10167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 517*10167291SKenneth E. Jansen // 518*10167291SKenneth E. Jansen // 519*10167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 520*10167291SKenneth E. Jansen // 521*10167291SKenneth E. Jansen // 522*10167291SKenneth E. Jansen genpar.idflx = 0 ; 523*10167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 524*10167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 525*10167291SKenneth E. Jansen 526*10167291SKenneth E. Jansen 527*10167291SKenneth E. Jansen 528*10167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 529*10167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 530*10167291SKenneth E. Jansen gcorp_t glbNZ; 531*10167291SKenneth E. Jansen 532*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 533*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 534*10167291SKenneth E. Jansen idiagnz[i]=0; 535*10167291SKenneth E. Jansen iodiagnz[i]=0; 536*10167291SKenneth E. Jansen } 537*10167291SKenneth E. Jansen i=0; 538*10167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 539*10167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 540*10167291SKenneth E. Jansen // this node is not owned by this rank so we skip 541*10167291SKenneth E. Jansen } else { 542*10167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 543*10167291SKenneth E. Jansen glbNZ=fncorp[row[j]]; 544*10167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 545*10167291SKenneth E. Jansen iodiagnz[i]++; 546*10167291SKenneth E. Jansen } else { 547*10167291SKenneth E. Jansen idiagnz[i]++; 548*10167291SKenneth E. Jansen } 549*10167291SKenneth E. Jansen } 550*10167291SKenneth E. Jansen i++; 551*10167291SKenneth E. Jansen } 552*10167291SKenneth E. Jansen } 553*10167291SKenneth E. Jansen gcorp_t mind=1000; 554*10167291SKenneth E. Jansen gcorp_t mino=1000; 555*10167291SKenneth E. Jansen gcorp_t maxd=0; 556*10167291SKenneth E. Jansen gcorp_t maxo=0; 557*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 558*10167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 559*10167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 560*10167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 561*10167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 562*10167291SKenneth E. Jansen } 563*10167291SKenneth E. Jansen 564*10167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 565*10167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 566*10167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 567*10167291SKenneth E. Jansen } 568*10167291SKenneth E. Jansen 569*10167291SKenneth E. Jansen 570*10167291SKenneth E. Jansen 571*10167291SKenneth E. Jansen } 572*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 573*10167291SKenneth E. Jansen 574*10167291SKenneth E. Jansen petsc_m = (PetscInt) iownnodes; 575*10167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt; 576*10167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 577*10167291SKenneth E. Jansen 578*10167291SKenneth E. Jansen ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 579*10167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsPs); 580*10167291SKenneth E. Jansen 581*10167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 582*10167291SKenneth E. Jansen 583*10167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 584*10167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 585*10167291SKenneth E. Jansen ierr = MatSetUp(lhsPs); 586*10167291SKenneth E. Jansen 587*10167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 588*10167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 589*10167291SKenneth E. Jansen } 590*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 591*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 592*10167291SKenneth E. Jansen if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 593*10167291SKenneth E. Jansen 594*10167291SKenneth E. Jansen get_time(duration, (duration+1)); 595*10167291SKenneth E. Jansen 596*10167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 597*10167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 598*10167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 599*10167291SKenneth E. Jansen // assert(fncorp[i]>0); 600*10167291SKenneth E. Jansen // } 601*10167291SKenneth E. Jansen 602*10167291SKenneth E. Jansen ElmGMRPETScSclr(y, ac, x, elDw, 603*10167291SKenneth E. Jansen shp, shgl, iBC, 604*10167291SKenneth E. Jansen BC, shpb, 605*10167291SKenneth E. Jansen shglb, res, 606*10167291SKenneth E. Jansen rmes, iper, 607*10167291SKenneth E. Jansen ilwork, &lhsPs); 608*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 609*10167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 610*10167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 611*10167291SKenneth E. Jansen // except for cache performance) 612*10167291SKenneth E. Jansen // TODO: Better arrangment? 613*10167291SKenneth E. Jansen nodetoinsert = 0; 614*10167291SKenneth E. Jansen k=0; 615*10167291SKenneth E. Jansen if(workfc.numpe > 1) { 616*10167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 617*10167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 618*10167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 619*10167291SKenneth E. Jansen k = k+1; 620*10167291SKenneth E. Jansen } 621*10167291SKenneth E. Jansen } 622*10167291SKenneth E. Jansen else { 623*10167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 624*10167291SKenneth E. Jansen nodetoinsert = i; 625*10167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 626*10167291SKenneth E. Jansen k = k+1; 627*10167291SKenneth E. Jansen } 628*10167291SKenneth E. Jansen } 629*10167291SKenneth E. Jansen 630*10167291SKenneth E. Jansen for (i=0; i<nshg; i++) { 631*10167291SKenneth E. Jansen gblindexsetarys[i] = i ; //TODO: better option for performance? 632*10167291SKenneth E. Jansen } 633*10167291SKenneth E. Jansen // Create Vector Index Maps 634*10167291SKenneth E. Jansen petsc_n = (PetscInt) nshg; 635*10167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 636*10167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSets); 637*10167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 638*10167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSets); 639*10167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 640*10167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 641*10167291SKenneth E. Jansen } 642*10167291SKenneth E. Jansen if(genpar.lhs ==1) { 643*10167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 644*10167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 645*10167291SKenneth E. Jansen } 646*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 647*10167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 648*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 649*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 650*10167291SKenneth E. Jansen } 651*10167291SKenneth E. Jansen ierr = VecZeroEntries(resPs); 652*10167291SKenneth E. Jansen ierr = VecZeroEntries(DyPs); 653*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 654*10167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals); 655*10167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 656*10167291SKenneth E. Jansen } 657*10167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobals); 658*10167291SKenneth E. Jansen 659*10167291SKenneth E. Jansen PetscRow=0; 660*10167291SKenneth E. Jansen k = 0; 661*10167291SKenneth E. Jansen int index; 662*10167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 663*10167291SKenneth E. Jansen valtoinsert = res[i]; 664*10167291SKenneth E. Jansen if(workfc.numpe > 1) { 665*10167291SKenneth E. Jansen PetscRow = (fncorp[i]-1); 666*10167291SKenneth E. Jansen } 667*10167291SKenneth E. Jansen else { 668*10167291SKenneth E. Jansen PetscRow = i-1; 669*10167291SKenneth E. Jansen } 670*10167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 671*10167291SKenneth E. Jansen assert(fncorp[i]>0); 672*10167291SKenneth E. Jansen assert(PetscRow>=0); 673*10167291SKenneth E. Jansen assert(PetscRow<=nshgt); 674*10167291SKenneth E. Jansen ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 675*10167291SKenneth E. Jansen } 676*10167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 677*10167291SKenneth E. Jansen ierr = VecAssemblyBegin(resPs); 678*10167291SKenneth E. Jansen ierr = VecAssemblyEnd(resPs); 679*10167291SKenneth E. Jansen 680*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 681*10167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 682*10167291SKenneth E. Jansen ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 683*10167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 684*10167291SKenneth E. Jansen ierr = KSPGetPC(ksps, &pcs); 685*10167291SKenneth E. Jansen ierr = PCSetType(pcs, PCPBJACOBI); 686*10167291SKenneth E. Jansen PetscInt maxits; 687*10167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 688*10167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 689*10167291SKenneth E. Jansen ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 690*10167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksps); 691*10167291SKenneth E. Jansen } 692*10167291SKenneth E. Jansen ierr = KSPSolve(ksps, resPs, DyPs); 693*10167291SKenneth E. Jansen if(firstpetsccalls == 1) { 694*10167291SKenneth E. Jansen ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 695*10167291SKenneth E. Jansen } 696*10167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 697*10167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 698*10167291SKenneth E. Jansen ierr = VecGetArray(DyPLocals, &parray); 699*10167291SKenneth E. Jansen PetscRow = 0; 700*10167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 701*10167291SKenneth E. Jansen index = node; 702*10167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 703*10167291SKenneth E. Jansen PetscRow = PetscRow+1; 704*10167291SKenneth E. Jansen } 705*10167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocals, &parray); 706*10167291SKenneth E. Jansen 707*10167291SKenneth E. Jansen firstpetsccalls = 0; 708*10167291SKenneth E. Jansen 709*10167291SKenneth E. Jansen // .... output the statistics 710*10167291SKenneth E. Jansen // 711*10167291SKenneth E. Jansen // itrpar.iKss=0; // see rstat() 712*10167291SKenneth E. Jansen PetscInt its; 713*10167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksps, &its); 714*10167291SKenneth E. Jansen itrpar.iKss = (int) its; 715*10167291SKenneth E. Jansen itrpar.ntotGMs += (int) its; 716*10167291SKenneth E. Jansen rstatSclr (res, ilwork); 717*10167291SKenneth E. Jansen // 718*10167291SKenneth E. Jansen // .... end 719*10167291SKenneth E. Jansen // 720*10167291SKenneth E. Jansen } 721*10167291SKenneth E. Jansen 722