117860365SKenneth E. Jansen #ifdef HAVE_PETSC 210167291SKenneth E. Jansen #include <stdio.h> 310167291SKenneth E. Jansen 410167291SKenneth E. Jansen #include "petscsys.h" 510167291SKenneth E. Jansen #include "petscvec.h" 610167291SKenneth E. Jansen #include "petscmat.h" 710167291SKenneth E. Jansen #include "petscpc.h" 810167291SKenneth E. Jansen #include "petscksp.h" 910167291SKenneth E. Jansen 1010167291SKenneth E. Jansen #include "assert.h" 1110167291SKenneth E. Jansen #include "common_c.h" 1210167291SKenneth E. Jansen 1310167291SKenneth E. Jansen #include <FCMangle.h> 1410167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 1510167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 1610167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 1710167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 1810167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 1910167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 2010167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b)) 2110167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b)) 2210167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 2310167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 2410167291SKenneth E. Jansen 2510167291SKenneth E. Jansen typedef long long int gcorp_t; 2610167291SKenneth E. Jansen 2710167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle); 2810167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 2910167291SKenneth E. Jansen 3010167291SKenneth E. Jansen //#include "auxmpi.h" 3110167291SKenneth E. Jansen // static PetscOffset poff; 3210167291SKenneth E. Jansen 3310167291SKenneth E. Jansen static Mat lhsP; 3410167291SKenneth E. Jansen static PC pc; 3510167291SKenneth E. Jansen static KSP ksp; 3610167291SKenneth E. Jansen static Vec DyP, resP, DyPLocal, resPGlobal; 3710167291SKenneth E. Jansen static PetscErrorCode ierr; 3810167291SKenneth E. Jansen static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 3910167291SKenneth E. Jansen static IS LocalIndexSet, GlobalIndexSet; 4010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMapping; 4110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMapping; 4210167291SKenneth E. Jansen static VecScatter scatter7; 4310167291SKenneth E. Jansen static int firstpetsccall = 1; 4410167291SKenneth E. Jansen 4510167291SKenneth E. Jansen static Mat lhsPs; 4610167291SKenneth E. Jansen static PC pcs; 4710167291SKenneth E. Jansen static KSP ksps; 4810167291SKenneth E. Jansen static Vec DyPs, resPs, DyPLocals, resPGlobals; 4910167291SKenneth E. Jansen static IS LocalIndexSets, GlobalIndexSets; 5010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMappings; 5110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMappings; 5210167291SKenneth E. Jansen static VecScatter scatter7s; 5310167291SKenneth E. Jansen static int firstpetsccalls = 1; 5410167291SKenneth E. Jansen 5510167291SKenneth E. Jansen void SolGMRp(double* y, double* ac, double* yold, 5610167291SKenneth E. Jansen double* x, int* iBC, double* BC, 5710167291SKenneth E. Jansen int* col, int* row, double* lhsk, 5810167291SKenneth E. Jansen double* res, double* BDiag, 5910167291SKenneth E. Jansen int* iper, 6010167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 6110167291SKenneth E. Jansen double* shglb, double* Dy, double* rerr, long long int* fncorp) 6210167291SKenneth E. Jansen { 6310167291SKenneth E. Jansen // 6410167291SKenneth E. Jansen // ---------------------------------------------------------------------- 6510167291SKenneth E. Jansen // 6610167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 6710167291SKenneth E. Jansen // 6810167291SKenneth E. Jansen // input: 6910167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 7010167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 7110167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 7210167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 7310167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 7410167291SKenneth E. Jansen // iBC (nshg) : BC codes 7510167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 7610167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 7710167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 7810167291SKenneth E. Jansen // 7910167291SKenneth E. Jansen // output: 8010167291SKenneth E. Jansen // res (nshg,nflow) : preconditioned residual 8110167291SKenneth E. Jansen // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 8210167291SKenneth E. Jansen // 8310167291SKenneth E. Jansen // 8410167291SKenneth E. Jansen // Zdenek Johan, Winter 1991. (Fortran 90) 8510167291SKenneth E. Jansen // ---------------------------------------------------------------------- 8610167291SKenneth E. Jansen // 8710167291SKenneth E. Jansen 8810167291SKenneth E. Jansen 8910167291SKenneth E. Jansen // Get variables from common_c.h 9010167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 9110167291SKenneth E. Jansen nshg = conpar.nshg; 9210167291SKenneth E. Jansen nflow = conpar.nflow; 9310167291SKenneth E. Jansen nsd = NSD; 9410167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 9510167291SKenneth E. Jansen 9610167291SKenneth E. Jansen gcorp_t nshgt; 9710167291SKenneth E. Jansen gcorp_t mbeg; 9810167291SKenneth E. Jansen gcorp_t mend; 9910167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 10010167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 10110167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 10210167291SKenneth E. Jansen 10310167291SKenneth E. Jansen 10410167291SKenneth E. Jansen int node, element, var, eqn; 10510167291SKenneth E. Jansen double valtoinsert; 10610167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 10710167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 10810167291SKenneth E. Jansen double DyFlat[nshg*nflow]; 10910167291SKenneth E. Jansen double DyFlatPhasta[nshg*nflow]; 11010167291SKenneth E. Jansen double rmes[nshg*nflow]; 11110167291SKenneth E. Jansen // DEBUG 11210167291SKenneth E. Jansen int i,j,k,l,m; 11310167291SKenneth E. Jansen //int indexsetary[nflow*nshg]; 11410167291SKenneth E. Jansen //int gblindexsetary[nflow*nshg]; 11510167291SKenneth E. Jansen PetscInt indexsetary[nflow*nshg]; 11610167291SKenneth E. Jansen PetscInt gblindexsetary[nflow*nshg]; 11710167291SKenneth E. Jansen PetscInt nodetoinsert; 11810167291SKenneth E. Jansen 11910167291SKenneth E. Jansen // FIXME: PetscScalar 12010167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 12110167291SKenneth E. Jansen // /DEBUG 12210167291SKenneth E. Jansen //double parray[1]; // Should be a PetscScalar 12310167291SKenneth E. Jansen double *parray; // Should be a PetscScalar 12410167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 12510167291SKenneth E. Jansen PetscInt petsc_n; 12610167291SKenneth E. Jansen PetscOne = 1; 12710167291SKenneth E. Jansen uint64_t duration[4]; 12810167291SKenneth E. Jansen 12910167291SKenneth E. Jansen // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 13010167291SKenneth E. Jansen 13110167291SKenneth E. Jansen if(firstpetsccall == 1) { 13210167291SKenneth E. Jansen /* call get_time(duration(1),duration(2)); 13310167291SKenneth E. Jansen call system('sleep 10'); 13410167291SKenneth E. Jansen call get_time(duration(3),duration(4)); 13510167291SKenneth E. Jansen call get_max_time_diff(duration(1), duration(3), 13610167291SKenneth E. Jansen duration(2), duration(4), 13710167291SKenneth E. Jansen "TenSecondSleep " // char(0)) 13810167291SKenneth E. Jansen if(myrank .eq. 0) { 13910167291SKenneth E. Jansen !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 14010167291SKenneth E. Jansen } 14110167291SKenneth E. Jansen */ 14210167291SKenneth E. Jansen } 14310167291SKenneth E. Jansen // 14410167291SKenneth E. Jansen // 14510167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 14610167291SKenneth E. Jansen // 14710167291SKenneth E. Jansen // 14810167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 14910167291SKenneth E. Jansen // 15010167291SKenneth E. Jansen // 15110167291SKenneth E. Jansen genpar.idflx = 0 ; 15210167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 15310167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 15410167291SKenneth E. Jansen 15510167291SKenneth E. Jansen 15610167291SKenneth E. Jansen 15710167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 15810167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 15910167291SKenneth E. Jansen gcorp_t glbNZ; 16010167291SKenneth E. Jansen 16110167291SKenneth E. Jansen if(firstpetsccall == 1) { 16210167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 16310167291SKenneth E. Jansen idiagnz[i]=0; 16410167291SKenneth E. Jansen iodiagnz[i]=0; 16510167291SKenneth E. Jansen } 16610167291SKenneth E. Jansen i=0; 16710167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 16810167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 16910167291SKenneth E. Jansen // this node is not owned by this rank so we skip 17010167291SKenneth E. Jansen } else { 17110167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 172*79f1763eSKenneth E. Jansen // assert(row[j]<=nshg); 173*79f1763eSKenneth E. Jansen // assert(fncorp[row[j]-1]<=nshgt); 174*79f1763eSKenneth E. Jansen glbNZ=fncorp[row[j]-1]; 17510167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 17610167291SKenneth E. Jansen iodiagnz[i]++; 17710167291SKenneth E. Jansen } else { 17810167291SKenneth E. Jansen idiagnz[i]++; 17910167291SKenneth E. Jansen } 18010167291SKenneth E. Jansen } 18110167291SKenneth E. Jansen i++; 18210167291SKenneth E. Jansen } 18310167291SKenneth E. Jansen } 18410167291SKenneth E. Jansen gcorp_t mind=1000; 18510167291SKenneth E. Jansen gcorp_t mino=1000; 18610167291SKenneth E. Jansen gcorp_t maxd=0; 18710167291SKenneth E. Jansen gcorp_t maxo=0; 18810167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 18910167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 19010167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 19110167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 19210167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 19310167291SKenneth E. Jansen // iodiagnz[i]=max(iodiagnz[i],10); 19410167291SKenneth E. Jansen // idiagnz[i]=max(idiagnz[i],10); 19510167291SKenneth E. Jansen // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 19610167291SKenneth E. Jansen // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 19710167291SKenneth E. Jansen } 19810167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this 19910167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization 20010167291SKenneth E. Jansen 20110167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 20210167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 20310167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 20410167291SKenneth E. Jansen } 20510167291SKenneth E. Jansen 20610167291SKenneth E. Jansen 20710167291SKenneth E. Jansen 20810167291SKenneth E. Jansen if(workfc.numpe < 200){ 20910167291SKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 21010167291SKenneth E. Jansen printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 21110167291SKenneth E. Jansen } 21210167291SKenneth E. Jansen // Print debug info 21310167291SKenneth E. Jansen if(nshgt < 200){ 21410167291SKenneth E. Jansen int irank; 21510167291SKenneth E. Jansen for(irank=0;irank<workfc.numpe;irank++) { 21610167291SKenneth E. Jansen if(irank == workfc.myrank){ 21710167291SKenneth E. Jansen printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 21810167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 21910167291SKenneth E. Jansen printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 22010167291SKenneth E. Jansen } 22110167291SKenneth E. Jansen } 22210167291SKenneth E. Jansen MPI_Barrier(MPI_COMM_WORLD); 22310167291SKenneth E. Jansen } 22410167291SKenneth E. Jansen } 22510167291SKenneth E. Jansen } 22610167291SKenneth E. Jansen if(firstpetsccall == 1) { 22710167291SKenneth E. Jansen 22810167291SKenneth E. Jansen petsc_bs = (PetscInt) nflow; 22910167291SKenneth E. Jansen petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 23010167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 23110167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 23210167291SKenneth E. Jansen // TODO: fixe nshgt for 64 bit integer!!!!! 23310167291SKenneth E. Jansen 23410167291SKenneth E. Jansen //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 23510167291SKenneth E. Jansen // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 23610167291SKenneth E. Jansen 23710167291SKenneth E. Jansen // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 23810167291SKenneth E. Jansen // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 23910167291SKenneth E. Jansen 24010167291SKenneth E. Jansen // next 2 do pre-allocate 24110167291SKenneth E. Jansen 24210167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 24310167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 24410167291SKenneth E. Jansen 24510167291SKenneth E. Jansen ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 24610167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsP); 24710167291SKenneth E. Jansen 24810167291SKenneth E. Jansen // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 24910167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 25010167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 25110167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 25210167291SKenneth E. Jansen 25310167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 254dcce770dSKenneth E. Jansen #ifdef JEDBROWN 25510167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 256dcce770dSKenneth E. Jansen #endif 25710167291SKenneth E. Jansen ierr = MatSetUp(lhsP); 25810167291SKenneth E. Jansen 25910167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 26010167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 26110167291SKenneth E. Jansen } 26210167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26310167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 26410167291SKenneth E. Jansen if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 26510167291SKenneth E. Jansen 26610167291SKenneth E. Jansen get_time(duration, (duration+1)); 26710167291SKenneth E. Jansen 26810167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26910167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 27010167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 27110167291SKenneth E. Jansen // assert(fncorp[i]>0); 27210167291SKenneth E. Jansen // } 27310167291SKenneth E. Jansen 27410167291SKenneth E. Jansen ElmGMRPETSc(y, ac, x, 27510167291SKenneth E. Jansen shp, shgl, iBC, 27610167291SKenneth E. Jansen BC, shpb, 27710167291SKenneth E. Jansen shglb, res, 27810167291SKenneth E. Jansen rmes, iper, 27910167291SKenneth E. Jansen ilwork, rerr , &lhsP); 28010167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 28110167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 28210167291SKenneth E. Jansen (duration+1), (duration+3), 28310167291SKenneth E. Jansen "ElmGMRPETSc \0"); // char(0)) 28410167291SKenneth E. Jansen // printf("after ElmGMRPETSc\n"); 28510167291SKenneth E. Jansen if(firstpetsccall == 1) { 28610167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 28710167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 28810167291SKenneth E. Jansen // except for cache performance) 28910167291SKenneth E. Jansen // TODO: Better arrangment? 29010167291SKenneth E. Jansen nodetoinsert = 0; 29110167291SKenneth E. Jansen k=0; 29210167291SKenneth E. Jansen if(workfc.numpe > 1) { 29310167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 29410167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 29510167291SKenneth E. Jansen // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 29610167291SKenneth E. Jansen // assert(fncorp[i]>=0); 29710167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 29810167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 29910167291SKenneth E. Jansen assert(indexsetary[k]>=0); 30010167291SKenneth E. Jansen // assert(fncorp[i]>=0); 30110167291SKenneth E. Jansen k = k+1; 30210167291SKenneth E. Jansen } 30310167291SKenneth E. Jansen } 30410167291SKenneth E. Jansen } 30510167291SKenneth E. Jansen else { 30610167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 30710167291SKenneth E. Jansen nodetoinsert = i; 30810167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 30910167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 31010167291SKenneth E. Jansen k = k+1; 31110167291SKenneth E. Jansen } 31210167291SKenneth E. Jansen } 31310167291SKenneth E. Jansen } 31410167291SKenneth E. Jansen 31510167291SKenneth E. Jansen for (i=0; i<nshg*nflow; i++) { 31610167291SKenneth E. Jansen gblindexsetary[i] = i ; //TODO: better option for performance? 31710167291SKenneth E. Jansen } 31810167291SKenneth E. Jansen // Create Vector Index Maps 31910167291SKenneth E. Jansen petsc_n = (PetscInt) nshg * (PetscInt) nflow; 32010167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 32110167291SKenneth E. Jansen // PETSC_COPY_VALUES, &LocalIndexSet); 32210167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 32310167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSet); 32410167291SKenneth E. Jansen // printf("after 1st ISCreateGeneral %d\n", ierr); 32510167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 32610167291SKenneth E. Jansen // PETSC_COPY_VALUES, &GlobalIndexSet); 32710167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 32810167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSet); 32910167291SKenneth E. Jansen // printf("after 2nd ISCreateGeneral %d\n", ierr); 33010167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 33110167291SKenneth E. Jansen // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 33210167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 33310167291SKenneth E. Jansen // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 33410167291SKenneth E. Jansen } 33510167291SKenneth E. Jansen if(genpar.lhs == 1) { 33610167291SKenneth E. Jansen get_time((duration), (duration+1)); 33710167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 33810167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 33910167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 34010167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 34110167291SKenneth E. Jansen (duration+1), (duration+3), 34210167291SKenneth E. Jansen "MatAssembly \0"); // char(0)) 34310167291SKenneth E. Jansen get_time(duration, (duration+1)); 34410167291SKenneth E. Jansen } 34510167291SKenneth E. Jansen if(firstpetsccall == 1) { 34610167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 34710167291SKenneth E. Jansen // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 34810167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 34910167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 35010167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 35110167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 35210167291SKenneth E. Jansen } 35310167291SKenneth E. Jansen ierr = VecZeroEntries(resP); 35410167291SKenneth E. Jansen ierr = VecZeroEntries(DyP); 35510167291SKenneth E. Jansen if(firstpetsccall == 1) { 35610167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal); 35710167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal); 35810167291SKenneth E. Jansen //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 35910167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 36010167291SKenneth E. Jansen } 36110167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobal); 36210167291SKenneth E. Jansen // call VecZeroEntries(DyPLocal, ierr) 36310167291SKenneth E. Jansen 36410167291SKenneth E. Jansen PetscRow=0; 36510167291SKenneth E. Jansen k = 0; 36610167291SKenneth E. Jansen int index; 36710167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 36810167291SKenneth E. Jansen for (j = 1; j<=nflow; j++){ 36910167291SKenneth E. Jansen index = i + (j-1)*nshg; 37010167291SKenneth E. Jansen valtoinsert = res[index]; 37110167291SKenneth E. Jansen if(workfc.numpe > 1) { 37210167291SKenneth E. Jansen PetscRow = (fncorp[i]-1)*nflow+(j-1); 37310167291SKenneth E. Jansen } 37410167291SKenneth E. Jansen else { 37510167291SKenneth E. Jansen PetscRow = i*nflow+(j-1); 37610167291SKenneth E. Jansen } 37710167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 37810167291SKenneth E. Jansen assert(fncorp[i]>0); 37910167291SKenneth E. Jansen assert(PetscRow>=0); 38010167291SKenneth E. Jansen assert(PetscRow<=nshgt*nflow); 38110167291SKenneth E. Jansen ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 38210167291SKenneth E. Jansen } 38310167291SKenneth E. Jansen } 38410167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 38510167291SKenneth E. Jansen ierr = VecAssemblyBegin(resP); 38610167291SKenneth E. Jansen ierr = VecAssemblyEnd(resP); 38710167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 38810167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 38910167291SKenneth E. Jansen (duration+1), (duration+3), 39010167291SKenneth E. Jansen "VectorWorkPre \0"); // char(0)) 39110167291SKenneth E. Jansen 39210167291SKenneth E. Jansen get_time((duration),(duration+1)); 39310167291SKenneth E. Jansen 39410167291SKenneth E. Jansen if(firstpetsccall == 1) { 39510167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 39610167291SKenneth E. Jansen ierr = KSPSetOperators(ksp, lhsP, lhsP); 39710167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 39810167291SKenneth E. Jansen ierr = KSPGetPC(ksp, &pc); 39910167291SKenneth E. Jansen ierr = PCSetType(pc, PCPBJACOBI); 40010167291SKenneth E. Jansen PetscInt maxits; 40110167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 40210167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 40310167291SKenneth E. Jansen ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 40410167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksp); 40510167291SKenneth E. Jansen } 40610167291SKenneth E. Jansen ierr = KSPSolve(ksp, resP, DyP); 40710167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 40810167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 40910167291SKenneth E. Jansen (duration+1), (duration+3), 41010167291SKenneth E. Jansen "KSPSolve \0"); // char(0)) 41110167291SKenneth E. Jansen //if(myrank .eq. 0) then 41210167291SKenneth E. Jansen //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 41310167291SKenneth E. Jansen //end if 41410167291SKenneth E. Jansen get_time((duration),(duration+1)); 41510167291SKenneth E. Jansen if(firstpetsccall == 1) { 41610167291SKenneth E. Jansen ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 41710167291SKenneth E. Jansen } 41810167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 41910167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 42010167291SKenneth E. Jansen ierr = VecGetArray(DyPLocal, &parray); 42110167291SKenneth E. Jansen PetscRow = 0; 42210167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 42310167291SKenneth E. Jansen for (var = 1; var<= nflow; var++) { 42410167291SKenneth E. Jansen index = node + (var-1)*nshg; 42510167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 42610167291SKenneth E. Jansen PetscRow = PetscRow+1; 42710167291SKenneth E. Jansen } 42810167291SKenneth E. Jansen } 42910167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocal, &parray); 43010167291SKenneth E. Jansen 43110167291SKenneth E. Jansen firstpetsccall = 0; 43210167291SKenneth E. Jansen // later timer ('Solver '); 43310167291SKenneth E. Jansen 43410167291SKenneth E. Jansen // .... output the statistics 43510167291SKenneth E. Jansen // 43610167291SKenneth E. Jansen itrpar.iKs=0; // see rstat() 43710167291SKenneth E. Jansen PetscInt its; 43810167291SKenneth E. Jansen //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 43910167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksp, &its); 44010167291SKenneth E. Jansen itrpar.iKs = (int) its; 44110167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 44210167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 44310167291SKenneth E. Jansen (duration+1), (duration+3), 44410167291SKenneth E. Jansen "solWork \0"); // char(0)) 44510167291SKenneth E. Jansen itrpar.ntotGM += (int) its; 44610167291SKenneth E. Jansen rstat (res, ilwork); 44710167291SKenneth E. Jansen // 44810167291SKenneth E. Jansen // .... stop the timer 44910167291SKenneth E. Jansen // 45010167291SKenneth E. Jansen // 3002 continue ! no solve just res. 45110167291SKenneth E. Jansen // later call timer ('Back ') 45210167291SKenneth E. Jansen // 45310167291SKenneth E. Jansen // .... end 45410167291SKenneth E. Jansen // 45510167291SKenneth E. Jansen } 45610167291SKenneth E. Jansen 45710167291SKenneth E. Jansen void SolGMRpSclr(double* y, double* ac, 45810167291SKenneth E. Jansen double* x, double* elDw, int* iBC, double* BC, 45910167291SKenneth E. Jansen int* col, int* row, 46010167291SKenneth E. Jansen int* iper, 46110167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 46210167291SKenneth E. Jansen double* shglb, double* res, double* Dy, long long int* fncorp) 46310167291SKenneth E. Jansen { 46410167291SKenneth E. Jansen // 46510167291SKenneth E. Jansen // ---------------------------------------------------------------------- 46610167291SKenneth E. Jansen // 46710167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 46810167291SKenneth E. Jansen // 46910167291SKenneth E. Jansen // input: 47010167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 47110167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 47210167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 47310167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 47410167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 47510167291SKenneth E. Jansen // iBC (nshg) : BC codes 47610167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 47710167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 47810167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 47910167291SKenneth E. Jansen // 48010167291SKenneth E. Jansen // ---------------------------------------------------------------------- 48110167291SKenneth E. Jansen // 48210167291SKenneth E. Jansen 48310167291SKenneth E. Jansen 48410167291SKenneth E. Jansen // Get variables from common_c.h 48510167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 48610167291SKenneth E. Jansen nshg = conpar.nshg; 48710167291SKenneth E. Jansen nsd = NSD; 48810167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 48910167291SKenneth E. Jansen 49010167291SKenneth E. Jansen gcorp_t nshgt; 49110167291SKenneth E. Jansen gcorp_t mbeg; 49210167291SKenneth E. Jansen gcorp_t mend; 49310167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 49410167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 49510167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 49610167291SKenneth E. Jansen 49710167291SKenneth E. Jansen 49810167291SKenneth E. Jansen int node, element, var, eqn; 49910167291SKenneth E. Jansen double valtoinsert; 50010167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 50110167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 50210167291SKenneth E. Jansen double DyFlats[nshg]; 50310167291SKenneth E. Jansen double DyFlatPhastas[nshg]; 50410167291SKenneth E. Jansen double rmes[nshg]; 50510167291SKenneth E. Jansen // DEBUG 50610167291SKenneth E. Jansen int i,j,k,l,m; 50710167291SKenneth E. Jansen PetscInt indexsetarys[nshg]; 50810167291SKenneth E. Jansen PetscInt gblindexsetarys[nshg]; 50910167291SKenneth E. Jansen PetscInt nodetoinsert; 51010167291SKenneth E. Jansen 51110167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 51210167291SKenneth E. Jansen double *parray; 51310167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 51410167291SKenneth E. Jansen PetscInt petsc_n; 51510167291SKenneth E. Jansen PetscOne = 1; 51610167291SKenneth E. Jansen uint64_t duration[4]; 51710167291SKenneth E. Jansen 51810167291SKenneth E. Jansen 51910167291SKenneth E. Jansen // 52010167291SKenneth E. Jansen // 52110167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 52210167291SKenneth E. Jansen // 52310167291SKenneth E. Jansen // 52410167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 52510167291SKenneth E. Jansen // 52610167291SKenneth E. Jansen // 52710167291SKenneth E. Jansen genpar.idflx = 0 ; 52810167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 52910167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 53010167291SKenneth E. Jansen 53110167291SKenneth E. Jansen 53210167291SKenneth E. Jansen 53310167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 53410167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 53510167291SKenneth E. Jansen gcorp_t glbNZ; 53610167291SKenneth E. Jansen 53710167291SKenneth E. Jansen if(firstpetsccalls == 1) { 53810167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 53910167291SKenneth E. Jansen idiagnz[i]=0; 54010167291SKenneth E. Jansen iodiagnz[i]=0; 54110167291SKenneth E. Jansen } 54210167291SKenneth E. Jansen i=0; 54310167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 54410167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 54510167291SKenneth E. Jansen // this node is not owned by this rank so we skip 54610167291SKenneth E. Jansen } else { 54710167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 54810167291SKenneth E. Jansen glbNZ=fncorp[row[j]]; 54910167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 55010167291SKenneth E. Jansen iodiagnz[i]++; 55110167291SKenneth E. Jansen } else { 55210167291SKenneth E. Jansen idiagnz[i]++; 55310167291SKenneth E. Jansen } 55410167291SKenneth E. Jansen } 55510167291SKenneth E. Jansen i++; 55610167291SKenneth E. Jansen } 55710167291SKenneth E. Jansen } 55810167291SKenneth E. Jansen gcorp_t mind=1000; 55910167291SKenneth E. Jansen gcorp_t mino=1000; 56010167291SKenneth E. Jansen gcorp_t maxd=0; 56110167291SKenneth E. Jansen gcorp_t maxo=0; 56210167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 56310167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 56410167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 56510167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 56610167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 56710167291SKenneth E. Jansen } 56810167291SKenneth E. Jansen 56910167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 57010167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 57110167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 57210167291SKenneth E. Jansen } 57310167291SKenneth E. Jansen 57410167291SKenneth E. Jansen 57510167291SKenneth E. Jansen 57610167291SKenneth E. Jansen } 57710167291SKenneth E. Jansen if(firstpetsccalls == 1) { 57810167291SKenneth E. Jansen 57910167291SKenneth E. Jansen petsc_m = (PetscInt) iownnodes; 58010167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt; 58110167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 58210167291SKenneth E. Jansen 58310167291SKenneth E. Jansen ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 58410167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsPs); 58510167291SKenneth E. Jansen 58610167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 58710167291SKenneth E. Jansen 58810167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 589dcce770dSKenneth E. Jansen #ifdef JEDBROWN 59010167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 591dcce770dSKenneth E. Jansen #endif 59210167291SKenneth E. Jansen ierr = MatSetUp(lhsPs); 59310167291SKenneth E. Jansen 59410167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 59510167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 59610167291SKenneth E. Jansen } 59710167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 59810167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 59910167291SKenneth E. Jansen if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 60010167291SKenneth E. Jansen 60110167291SKenneth E. Jansen get_time(duration, (duration+1)); 60210167291SKenneth E. Jansen 60310167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 60410167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 60510167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 60610167291SKenneth E. Jansen // assert(fncorp[i]>0); 60710167291SKenneth E. Jansen // } 60810167291SKenneth E. Jansen 60910167291SKenneth E. Jansen ElmGMRPETScSclr(y, ac, x, elDw, 61010167291SKenneth E. Jansen shp, shgl, iBC, 61110167291SKenneth E. Jansen BC, shpb, 61210167291SKenneth E. Jansen shglb, res, 61310167291SKenneth E. Jansen rmes, iper, 61410167291SKenneth E. Jansen ilwork, &lhsPs); 61510167291SKenneth E. Jansen if(firstpetsccalls == 1) { 61610167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 61710167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 61810167291SKenneth E. Jansen // except for cache performance) 61910167291SKenneth E. Jansen // TODO: Better arrangment? 62010167291SKenneth E. Jansen nodetoinsert = 0; 62110167291SKenneth E. Jansen k=0; 62210167291SKenneth E. Jansen if(workfc.numpe > 1) { 62310167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 62410167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 62510167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 62610167291SKenneth E. Jansen k = k+1; 62710167291SKenneth E. Jansen } 62810167291SKenneth E. Jansen } 62910167291SKenneth E. Jansen else { 63010167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 63110167291SKenneth E. Jansen nodetoinsert = i; 63210167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 63310167291SKenneth E. Jansen k = k+1; 63410167291SKenneth E. Jansen } 63510167291SKenneth E. Jansen } 63610167291SKenneth E. Jansen 63710167291SKenneth E. Jansen for (i=0; i<nshg; i++) { 63810167291SKenneth E. Jansen gblindexsetarys[i] = i ; //TODO: better option for performance? 63910167291SKenneth E. Jansen } 64010167291SKenneth E. Jansen // Create Vector Index Maps 64110167291SKenneth E. Jansen petsc_n = (PetscInt) nshg; 64210167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 64310167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSets); 64410167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 64510167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSets); 64610167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 64710167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 64810167291SKenneth E. Jansen } 64910167291SKenneth E. Jansen if(genpar.lhs ==1) { 65010167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 65110167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 65210167291SKenneth E. Jansen } 65310167291SKenneth E. Jansen if(firstpetsccalls == 1) { 65410167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 65510167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 65610167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 65710167291SKenneth E. Jansen } 65810167291SKenneth E. Jansen ierr = VecZeroEntries(resPs); 65910167291SKenneth E. Jansen ierr = VecZeroEntries(DyPs); 66010167291SKenneth E. Jansen if(firstpetsccalls == 1) { 66110167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals); 66210167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 66310167291SKenneth E. Jansen } 66410167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobals); 66510167291SKenneth E. Jansen 66610167291SKenneth E. Jansen PetscRow=0; 66710167291SKenneth E. Jansen k = 0; 66810167291SKenneth E. Jansen int index; 66910167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 67010167291SKenneth E. Jansen valtoinsert = res[i]; 67110167291SKenneth E. Jansen if(workfc.numpe > 1) { 67210167291SKenneth E. Jansen PetscRow = (fncorp[i]-1); 67310167291SKenneth E. Jansen } 67410167291SKenneth E. Jansen else { 67510167291SKenneth E. Jansen PetscRow = i-1; 67610167291SKenneth E. Jansen } 67710167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 67810167291SKenneth E. Jansen assert(fncorp[i]>0); 67910167291SKenneth E. Jansen assert(PetscRow>=0); 68010167291SKenneth E. Jansen assert(PetscRow<=nshgt); 68110167291SKenneth E. Jansen ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 68210167291SKenneth E. Jansen } 68310167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 68410167291SKenneth E. Jansen ierr = VecAssemblyBegin(resPs); 68510167291SKenneth E. Jansen ierr = VecAssemblyEnd(resPs); 68610167291SKenneth E. Jansen 68710167291SKenneth E. Jansen if(firstpetsccalls == 1) { 68810167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 68910167291SKenneth E. Jansen ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 69010167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 69110167291SKenneth E. Jansen ierr = KSPGetPC(ksps, &pcs); 69210167291SKenneth E. Jansen ierr = PCSetType(pcs, PCPBJACOBI); 69310167291SKenneth E. Jansen PetscInt maxits; 69410167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 69510167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 69610167291SKenneth E. Jansen ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 69710167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksps); 69810167291SKenneth E. Jansen } 69910167291SKenneth E. Jansen ierr = KSPSolve(ksps, resPs, DyPs); 70010167291SKenneth E. Jansen if(firstpetsccalls == 1) { 70110167291SKenneth E. Jansen ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 70210167291SKenneth E. Jansen } 70310167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 70410167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 70510167291SKenneth E. Jansen ierr = VecGetArray(DyPLocals, &parray); 70610167291SKenneth E. Jansen PetscRow = 0; 70710167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 70810167291SKenneth E. Jansen index = node; 70910167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 71010167291SKenneth E. Jansen PetscRow = PetscRow+1; 71110167291SKenneth E. Jansen } 71210167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocals, &parray); 71310167291SKenneth E. Jansen 71410167291SKenneth E. Jansen firstpetsccalls = 0; 71510167291SKenneth E. Jansen 71610167291SKenneth E. Jansen // .... output the statistics 71710167291SKenneth E. Jansen // 71810167291SKenneth E. Jansen // itrpar.iKss=0; // see rstat() 71910167291SKenneth E. Jansen PetscInt its; 72010167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksps, &its); 72110167291SKenneth E. Jansen itrpar.iKss = (int) its; 72210167291SKenneth E. Jansen itrpar.ntotGMs += (int) its; 72310167291SKenneth E. Jansen rstatSclr (res, ilwork); 72410167291SKenneth E. Jansen // 72510167291SKenneth E. Jansen // .... end 72610167291SKenneth E. Jansen // 72710167291SKenneth E. Jansen } 72817860365SKenneth E. Jansen #endif 729