117860365SKenneth E. Jansen #ifdef HAVE_PETSC 210167291SKenneth E. Jansen #include <stdio.h> 310167291SKenneth E. Jansen 410167291SKenneth E. Jansen #include "petscsys.h" 510167291SKenneth E. Jansen #include "petscvec.h" 610167291SKenneth E. Jansen #include "petscmat.h" 710167291SKenneth E. Jansen #include "petscpc.h" 810167291SKenneth E. Jansen #include "petscksp.h" 910167291SKenneth E. Jansen 1010167291SKenneth E. Jansen #include "assert.h" 1110167291SKenneth E. Jansen #include "common_c.h" 1210167291SKenneth E. Jansen 1310167291SKenneth E. Jansen #include <FCMangle.h> 1410167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 1510167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 1610167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 1710167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 1810167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 1910167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 2010167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b)) 2110167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b)) 2210167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 2310167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 2410167291SKenneth E. Jansen 2510167291SKenneth E. Jansen typedef long long int gcorp_t; 2610167291SKenneth E. Jansen 2710167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle); 2810167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 2910167291SKenneth E. Jansen 3010167291SKenneth E. Jansen //#include "auxmpi.h" 3110167291SKenneth E. Jansen // static PetscOffset poff; 3210167291SKenneth E. Jansen 3310167291SKenneth E. Jansen static Mat lhsP; 3410167291SKenneth E. Jansen static PC pc; 3510167291SKenneth E. Jansen static KSP ksp; 3610167291SKenneth E. Jansen static Vec DyP, resP, DyPLocal, resPGlobal; 3710167291SKenneth E. Jansen static PetscErrorCode ierr; 3810167291SKenneth E. Jansen static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 3910167291SKenneth E. Jansen static IS LocalIndexSet, GlobalIndexSet; 4010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMapping; 4110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMapping; 4210167291SKenneth E. Jansen static VecScatter scatter7; 4310167291SKenneth E. Jansen static int firstpetsccall = 1; 4410167291SKenneth E. Jansen 4510167291SKenneth E. Jansen static Mat lhsPs; 4610167291SKenneth E. Jansen static PC pcs; 4710167291SKenneth E. Jansen static KSP ksps; 4810167291SKenneth E. Jansen static Vec DyPs, resPs, DyPLocals, resPGlobals; 4910167291SKenneth E. Jansen static IS LocalIndexSets, GlobalIndexSets; 5010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMappings; 5110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMappings; 5210167291SKenneth E. Jansen static VecScatter scatter7s; 5310167291SKenneth E. Jansen static int firstpetsccalls = 1; 5410167291SKenneth E. Jansen 5510167291SKenneth E. Jansen void SolGMRp(double* y, double* ac, double* yold, 5610167291SKenneth E. Jansen double* x, int* iBC, double* BC, 5710167291SKenneth E. Jansen int* col, int* row, double* lhsk, 5810167291SKenneth E. Jansen double* res, double* BDiag, 5910167291SKenneth E. Jansen int* iper, 6010167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 6110167291SKenneth E. Jansen double* shglb, double* Dy, double* rerr, long long int* fncorp) 6210167291SKenneth E. Jansen { 6310167291SKenneth E. Jansen // 6410167291SKenneth E. Jansen // ---------------------------------------------------------------------- 6510167291SKenneth E. Jansen // 6610167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 6710167291SKenneth E. Jansen // 6810167291SKenneth E. Jansen // input: 6910167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 7010167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 7110167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 7210167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 7310167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 7410167291SKenneth E. Jansen // iBC (nshg) : BC codes 7510167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 7610167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 7710167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 7810167291SKenneth E. Jansen // 7910167291SKenneth E. Jansen // output: 8010167291SKenneth E. Jansen // res (nshg,nflow) : preconditioned residual 8110167291SKenneth E. Jansen // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 8210167291SKenneth E. Jansen // 8310167291SKenneth E. Jansen // 8410167291SKenneth E. Jansen // Zdenek Johan, Winter 1991. (Fortran 90) 8510167291SKenneth E. Jansen // ---------------------------------------------------------------------- 8610167291SKenneth E. Jansen // 8710167291SKenneth E. Jansen 8810167291SKenneth E. Jansen 8910167291SKenneth E. Jansen // Get variables from common_c.h 9010167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 9110167291SKenneth E. Jansen nshg = conpar.nshg; 9210167291SKenneth E. Jansen nflow = conpar.nflow; 9310167291SKenneth E. Jansen nsd = NSD; 9410167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 9510167291SKenneth E. Jansen 9610167291SKenneth E. Jansen gcorp_t nshgt; 9710167291SKenneth E. Jansen gcorp_t mbeg; 9810167291SKenneth E. Jansen gcorp_t mend; 9910167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 10010167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 10110167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 10210167291SKenneth E. Jansen 10310167291SKenneth E. Jansen 10410167291SKenneth E. Jansen int node, element, var, eqn; 10510167291SKenneth E. Jansen double valtoinsert; 10610167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 10710167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 10810167291SKenneth E. Jansen double DyFlat[nshg*nflow]; 10910167291SKenneth E. Jansen double DyFlatPhasta[nshg*nflow]; 11010167291SKenneth E. Jansen double rmes[nshg*nflow]; 11110167291SKenneth E. Jansen // DEBUG 11210167291SKenneth E. Jansen int i,j,k,l,m; 11310167291SKenneth E. Jansen //int indexsetary[nflow*nshg]; 11410167291SKenneth E. Jansen //int gblindexsetary[nflow*nshg]; 11510167291SKenneth E. Jansen PetscInt indexsetary[nflow*nshg]; 11610167291SKenneth E. Jansen PetscInt gblindexsetary[nflow*nshg]; 11710167291SKenneth E. Jansen PetscInt nodetoinsert; 11810167291SKenneth E. Jansen 11910167291SKenneth E. Jansen // FIXME: PetscScalar 12010167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 12110167291SKenneth E. Jansen // /DEBUG 12210167291SKenneth E. Jansen //double parray[1]; // Should be a PetscScalar 12310167291SKenneth E. Jansen double *parray; // Should be a PetscScalar 12410167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 12510167291SKenneth E. Jansen PetscInt petsc_n; 12610167291SKenneth E. Jansen PetscOne = 1; 12710167291SKenneth E. Jansen uint64_t duration[4]; 12810167291SKenneth E. Jansen 12910167291SKenneth E. Jansen // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 13010167291SKenneth E. Jansen 13110167291SKenneth E. Jansen if(firstpetsccall == 1) { 13210167291SKenneth E. Jansen /* call get_time(duration(1),duration(2)); 13310167291SKenneth E. Jansen call system('sleep 10'); 13410167291SKenneth E. Jansen call get_time(duration(3),duration(4)); 13510167291SKenneth E. Jansen call get_max_time_diff(duration(1), duration(3), 13610167291SKenneth E. Jansen duration(2), duration(4), 13710167291SKenneth E. Jansen "TenSecondSleep " // char(0)) 13810167291SKenneth E. Jansen if(myrank .eq. 0) { 13910167291SKenneth E. Jansen !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 14010167291SKenneth E. Jansen } 14110167291SKenneth E. Jansen */ 14210167291SKenneth E. Jansen } 14310167291SKenneth E. Jansen // 14410167291SKenneth E. Jansen // 14510167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 14610167291SKenneth E. Jansen // 14710167291SKenneth E. Jansen // 14810167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 14910167291SKenneth E. Jansen // 15010167291SKenneth E. Jansen // 15110167291SKenneth E. Jansen genpar.idflx = 0 ; 15210167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 15310167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 15410167291SKenneth E. Jansen 15510167291SKenneth E. Jansen 15610167291SKenneth E. Jansen 15710167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 15810167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 15910167291SKenneth E. Jansen gcorp_t glbNZ; 16010167291SKenneth E. Jansen 16110167291SKenneth E. Jansen if(firstpetsccall == 1) { 16210167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 16310167291SKenneth E. Jansen idiagnz[i]=0; 16410167291SKenneth E. Jansen iodiagnz[i]=0; 16510167291SKenneth E. Jansen } 16610167291SKenneth E. Jansen i=0; 16710167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 16810167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 16910167291SKenneth E. Jansen // this node is not owned by this rank so we skip 17010167291SKenneth E. Jansen } else { 17110167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 17210167291SKenneth E. Jansen glbNZ=fncorp[row[j]]; 17310167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 17410167291SKenneth E. Jansen iodiagnz[i]++; 17510167291SKenneth E. Jansen } else { 17610167291SKenneth E. Jansen idiagnz[i]++; 17710167291SKenneth E. Jansen } 17810167291SKenneth E. Jansen } 17910167291SKenneth E. Jansen i++; 18010167291SKenneth E. Jansen } 18110167291SKenneth E. Jansen } 18210167291SKenneth E. Jansen gcorp_t mind=1000; 18310167291SKenneth E. Jansen gcorp_t mino=1000; 18410167291SKenneth E. Jansen gcorp_t maxd=0; 18510167291SKenneth E. Jansen gcorp_t maxo=0; 18610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 18710167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 18810167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 18910167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 19010167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 19110167291SKenneth E. Jansen // iodiagnz[i]=max(iodiagnz[i],10); 19210167291SKenneth E. Jansen // idiagnz[i]=max(idiagnz[i],10); 19310167291SKenneth E. Jansen // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 19410167291SKenneth E. Jansen // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 19510167291SKenneth E. Jansen } 19610167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this 19710167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization 19810167291SKenneth E. Jansen 19910167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 20010167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 20110167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 20210167291SKenneth E. Jansen } 20310167291SKenneth E. Jansen 20410167291SKenneth E. Jansen 20510167291SKenneth E. Jansen 20610167291SKenneth E. Jansen if(workfc.numpe < 200){ 20710167291SKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 20810167291SKenneth E. Jansen printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 20910167291SKenneth E. Jansen } 21010167291SKenneth E. Jansen // Print debug info 21110167291SKenneth E. Jansen if(nshgt < 200){ 21210167291SKenneth E. Jansen int irank; 21310167291SKenneth E. Jansen for(irank=0;irank<workfc.numpe;irank++) { 21410167291SKenneth E. Jansen if(irank == workfc.myrank){ 21510167291SKenneth E. Jansen printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 21610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 21710167291SKenneth E. Jansen printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 21810167291SKenneth E. Jansen } 21910167291SKenneth E. Jansen } 22010167291SKenneth E. Jansen MPI_Barrier(MPI_COMM_WORLD); 22110167291SKenneth E. Jansen } 22210167291SKenneth E. Jansen } 22310167291SKenneth E. Jansen } 22410167291SKenneth E. Jansen if(firstpetsccall == 1) { 22510167291SKenneth E. Jansen 22610167291SKenneth E. Jansen petsc_bs = (PetscInt) nflow; 22710167291SKenneth E. Jansen petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 22810167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 22910167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 23010167291SKenneth E. Jansen // TODO: fixe nshgt for 64 bit integer!!!!! 23110167291SKenneth E. Jansen 23210167291SKenneth E. Jansen //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 23310167291SKenneth E. Jansen // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 23410167291SKenneth E. Jansen 23510167291SKenneth E. Jansen // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 23610167291SKenneth E. Jansen // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 23710167291SKenneth E. Jansen 23810167291SKenneth E. Jansen // next 2 do pre-allocate 23910167291SKenneth E. Jansen 24010167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 24110167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 24210167291SKenneth E. Jansen 24310167291SKenneth E. Jansen ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 24410167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsP); 24510167291SKenneth E. Jansen 24610167291SKenneth E. Jansen // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 24710167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 24810167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 24910167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 25010167291SKenneth E. Jansen 25110167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 252*dcce770dSKenneth E. Jansen #ifdef JEDBROWN 25310167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 254*dcce770dSKenneth E. Jansen #endif 25510167291SKenneth E. Jansen ierr = MatSetUp(lhsP); 25610167291SKenneth E. Jansen 25710167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 25810167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 25910167291SKenneth E. Jansen } 26010167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26110167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 26210167291SKenneth E. Jansen if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 26310167291SKenneth E. Jansen 26410167291SKenneth E. Jansen get_time(duration, (duration+1)); 26510167291SKenneth E. Jansen 26610167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26710167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 26810167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 26910167291SKenneth E. Jansen // assert(fncorp[i]>0); 27010167291SKenneth E. Jansen // } 27110167291SKenneth E. Jansen 27210167291SKenneth E. Jansen ElmGMRPETSc(y, ac, x, 27310167291SKenneth E. Jansen shp, shgl, iBC, 27410167291SKenneth E. Jansen BC, shpb, 27510167291SKenneth E. Jansen shglb, res, 27610167291SKenneth E. Jansen rmes, iper, 27710167291SKenneth E. Jansen ilwork, rerr , &lhsP); 27810167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 27910167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 28010167291SKenneth E. Jansen (duration+1), (duration+3), 28110167291SKenneth E. Jansen "ElmGMRPETSc \0"); // char(0)) 28210167291SKenneth E. Jansen // printf("after ElmGMRPETSc\n"); 28310167291SKenneth E. Jansen if(firstpetsccall == 1) { 28410167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 28510167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 28610167291SKenneth E. Jansen // except for cache performance) 28710167291SKenneth E. Jansen // TODO: Better arrangment? 28810167291SKenneth E. Jansen nodetoinsert = 0; 28910167291SKenneth E. Jansen k=0; 29010167291SKenneth E. Jansen if(workfc.numpe > 1) { 29110167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 29210167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 29310167291SKenneth E. Jansen // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 29410167291SKenneth E. Jansen // assert(fncorp[i]>=0); 29510167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 29610167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 29710167291SKenneth E. Jansen assert(indexsetary[k]>=0); 29810167291SKenneth E. Jansen // assert(fncorp[i]>=0); 29910167291SKenneth E. Jansen k = k+1; 30010167291SKenneth E. Jansen } 30110167291SKenneth E. Jansen } 30210167291SKenneth E. Jansen } 30310167291SKenneth E. Jansen else { 30410167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 30510167291SKenneth E. Jansen nodetoinsert = i; 30610167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 30710167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 30810167291SKenneth E. Jansen k = k+1; 30910167291SKenneth E. Jansen } 31010167291SKenneth E. Jansen } 31110167291SKenneth E. Jansen } 31210167291SKenneth E. Jansen 31310167291SKenneth E. Jansen for (i=0; i<nshg*nflow; i++) { 31410167291SKenneth E. Jansen gblindexsetary[i] = i ; //TODO: better option for performance? 31510167291SKenneth E. Jansen } 31610167291SKenneth E. Jansen // Create Vector Index Maps 31710167291SKenneth E. Jansen petsc_n = (PetscInt) nshg * (PetscInt) nflow; 31810167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 31910167291SKenneth E. Jansen // PETSC_COPY_VALUES, &LocalIndexSet); 32010167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 32110167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSet); 32210167291SKenneth E. Jansen // printf("after 1st ISCreateGeneral %d\n", ierr); 32310167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 32410167291SKenneth E. Jansen // PETSC_COPY_VALUES, &GlobalIndexSet); 32510167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 32610167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSet); 32710167291SKenneth E. Jansen // printf("after 2nd ISCreateGeneral %d\n", ierr); 32810167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 32910167291SKenneth E. Jansen // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 33010167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 33110167291SKenneth E. Jansen // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 33210167291SKenneth E. Jansen } 33310167291SKenneth E. Jansen if(genpar.lhs == 1) { 33410167291SKenneth E. Jansen get_time((duration), (duration+1)); 33510167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 33610167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 33710167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 33810167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 33910167291SKenneth E. Jansen (duration+1), (duration+3), 34010167291SKenneth E. Jansen "MatAssembly \0"); // char(0)) 34110167291SKenneth E. Jansen get_time(duration, (duration+1)); 34210167291SKenneth E. Jansen } 34310167291SKenneth E. Jansen if(firstpetsccall == 1) { 34410167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 34510167291SKenneth E. Jansen // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 34610167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 34710167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 34810167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 34910167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 35010167291SKenneth E. Jansen } 35110167291SKenneth E. Jansen ierr = VecZeroEntries(resP); 35210167291SKenneth E. Jansen ierr = VecZeroEntries(DyP); 35310167291SKenneth E. Jansen if(firstpetsccall == 1) { 35410167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal); 35510167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal); 35610167291SKenneth E. Jansen //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 35710167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 35810167291SKenneth E. Jansen } 35910167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobal); 36010167291SKenneth E. Jansen // call VecZeroEntries(DyPLocal, ierr) 36110167291SKenneth E. Jansen 36210167291SKenneth E. Jansen PetscRow=0; 36310167291SKenneth E. Jansen k = 0; 36410167291SKenneth E. Jansen int index; 36510167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 36610167291SKenneth E. Jansen for (j = 1; j<=nflow; j++){ 36710167291SKenneth E. Jansen index = i + (j-1)*nshg; 36810167291SKenneth E. Jansen valtoinsert = res[index]; 36910167291SKenneth E. Jansen if(workfc.numpe > 1) { 37010167291SKenneth E. Jansen PetscRow = (fncorp[i]-1)*nflow+(j-1); 37110167291SKenneth E. Jansen } 37210167291SKenneth E. Jansen else { 37310167291SKenneth E. Jansen PetscRow = i*nflow+(j-1); 37410167291SKenneth E. Jansen } 37510167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 37610167291SKenneth E. Jansen assert(fncorp[i]>0); 37710167291SKenneth E. Jansen assert(PetscRow>=0); 37810167291SKenneth E. Jansen assert(PetscRow<=nshgt*nflow); 37910167291SKenneth E. Jansen ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 38010167291SKenneth E. Jansen } 38110167291SKenneth E. Jansen } 38210167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 38310167291SKenneth E. Jansen ierr = VecAssemblyBegin(resP); 38410167291SKenneth E. Jansen ierr = VecAssemblyEnd(resP); 38510167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 38610167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 38710167291SKenneth E. Jansen (duration+1), (duration+3), 38810167291SKenneth E. Jansen "VectorWorkPre \0"); // char(0)) 38910167291SKenneth E. Jansen 39010167291SKenneth E. Jansen get_time((duration),(duration+1)); 39110167291SKenneth E. Jansen 39210167291SKenneth E. Jansen if(firstpetsccall == 1) { 39310167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 39410167291SKenneth E. Jansen ierr = KSPSetOperators(ksp, lhsP, lhsP); 39510167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 39610167291SKenneth E. Jansen ierr = KSPGetPC(ksp, &pc); 39710167291SKenneth E. Jansen ierr = PCSetType(pc, PCPBJACOBI); 39810167291SKenneth E. Jansen PetscInt maxits; 39910167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 40010167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 40110167291SKenneth E. Jansen ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 40210167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksp); 40310167291SKenneth E. Jansen } 40410167291SKenneth E. Jansen ierr = KSPSolve(ksp, resP, DyP); 40510167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 40610167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 40710167291SKenneth E. Jansen (duration+1), (duration+3), 40810167291SKenneth E. Jansen "KSPSolve \0"); // char(0)) 40910167291SKenneth E. Jansen //if(myrank .eq. 0) then 41010167291SKenneth E. Jansen //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 41110167291SKenneth E. Jansen //end if 41210167291SKenneth E. Jansen get_time((duration),(duration+1)); 41310167291SKenneth E. Jansen if(firstpetsccall == 1) { 41410167291SKenneth E. Jansen ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 41510167291SKenneth E. Jansen } 41610167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 41710167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 41810167291SKenneth E. Jansen ierr = VecGetArray(DyPLocal, &parray); 41910167291SKenneth E. Jansen PetscRow = 0; 42010167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 42110167291SKenneth E. Jansen for (var = 1; var<= nflow; var++) { 42210167291SKenneth E. Jansen index = node + (var-1)*nshg; 42310167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 42410167291SKenneth E. Jansen PetscRow = PetscRow+1; 42510167291SKenneth E. Jansen } 42610167291SKenneth E. Jansen } 42710167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocal, &parray); 42810167291SKenneth E. Jansen 42910167291SKenneth E. Jansen firstpetsccall = 0; 43010167291SKenneth E. Jansen // later timer ('Solver '); 43110167291SKenneth E. Jansen 43210167291SKenneth E. Jansen // .... output the statistics 43310167291SKenneth E. Jansen // 43410167291SKenneth E. Jansen itrpar.iKs=0; // see rstat() 43510167291SKenneth E. Jansen PetscInt its; 43610167291SKenneth E. Jansen //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 43710167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksp, &its); 43810167291SKenneth E. Jansen itrpar.iKs = (int) its; 43910167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 44010167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 44110167291SKenneth E. Jansen (duration+1), (duration+3), 44210167291SKenneth E. Jansen "solWork \0"); // char(0)) 44310167291SKenneth E. Jansen itrpar.ntotGM += (int) its; 44410167291SKenneth E. Jansen rstat (res, ilwork); 44510167291SKenneth E. Jansen // 44610167291SKenneth E. Jansen // .... stop the timer 44710167291SKenneth E. Jansen // 44810167291SKenneth E. Jansen // 3002 continue ! no solve just res. 44910167291SKenneth E. Jansen // later call timer ('Back ') 45010167291SKenneth E. Jansen // 45110167291SKenneth E. Jansen // .... end 45210167291SKenneth E. Jansen // 45310167291SKenneth E. Jansen } 45410167291SKenneth E. Jansen 45510167291SKenneth E. Jansen void SolGMRpSclr(double* y, double* ac, 45610167291SKenneth E. Jansen double* x, double* elDw, int* iBC, double* BC, 45710167291SKenneth E. Jansen int* col, int* row, 45810167291SKenneth E. Jansen int* iper, 45910167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 46010167291SKenneth E. Jansen double* shglb, double* res, double* Dy, long long int* fncorp) 46110167291SKenneth E. Jansen { 46210167291SKenneth E. Jansen // 46310167291SKenneth E. Jansen // ---------------------------------------------------------------------- 46410167291SKenneth E. Jansen // 46510167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 46610167291SKenneth E. Jansen // 46710167291SKenneth E. Jansen // input: 46810167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 46910167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 47010167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 47110167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 47210167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 47310167291SKenneth E. Jansen // iBC (nshg) : BC codes 47410167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 47510167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 47610167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 47710167291SKenneth E. Jansen // 47810167291SKenneth E. Jansen // ---------------------------------------------------------------------- 47910167291SKenneth E. Jansen // 48010167291SKenneth E. Jansen 48110167291SKenneth E. Jansen 48210167291SKenneth E. Jansen // Get variables from common_c.h 48310167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 48410167291SKenneth E. Jansen nshg = conpar.nshg; 48510167291SKenneth E. Jansen nsd = NSD; 48610167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 48710167291SKenneth E. Jansen 48810167291SKenneth E. Jansen gcorp_t nshgt; 48910167291SKenneth E. Jansen gcorp_t mbeg; 49010167291SKenneth E. Jansen gcorp_t mend; 49110167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 49210167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 49310167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 49410167291SKenneth E. Jansen 49510167291SKenneth E. Jansen 49610167291SKenneth E. Jansen int node, element, var, eqn; 49710167291SKenneth E. Jansen double valtoinsert; 49810167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 49910167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 50010167291SKenneth E. Jansen double DyFlats[nshg]; 50110167291SKenneth E. Jansen double DyFlatPhastas[nshg]; 50210167291SKenneth E. Jansen double rmes[nshg]; 50310167291SKenneth E. Jansen // DEBUG 50410167291SKenneth E. Jansen int i,j,k,l,m; 50510167291SKenneth E. Jansen PetscInt indexsetarys[nshg]; 50610167291SKenneth E. Jansen PetscInt gblindexsetarys[nshg]; 50710167291SKenneth E. Jansen PetscInt nodetoinsert; 50810167291SKenneth E. Jansen 50910167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 51010167291SKenneth E. Jansen double *parray; 51110167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 51210167291SKenneth E. Jansen PetscInt petsc_n; 51310167291SKenneth E. Jansen PetscOne = 1; 51410167291SKenneth E. Jansen uint64_t duration[4]; 51510167291SKenneth E. Jansen 51610167291SKenneth E. Jansen 51710167291SKenneth E. Jansen // 51810167291SKenneth E. Jansen // 51910167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 52010167291SKenneth E. Jansen // 52110167291SKenneth E. Jansen // 52210167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 52310167291SKenneth E. Jansen // 52410167291SKenneth E. Jansen // 52510167291SKenneth E. Jansen genpar.idflx = 0 ; 52610167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 52710167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 52810167291SKenneth E. Jansen 52910167291SKenneth E. Jansen 53010167291SKenneth E. Jansen 53110167291SKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 53210167291SKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 53310167291SKenneth E. Jansen gcorp_t glbNZ; 53410167291SKenneth E. Jansen 53510167291SKenneth E. Jansen if(firstpetsccalls == 1) { 53610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 53710167291SKenneth E. Jansen idiagnz[i]=0; 53810167291SKenneth E. Jansen iodiagnz[i]=0; 53910167291SKenneth E. Jansen } 54010167291SKenneth E. Jansen i=0; 54110167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 54210167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 54310167291SKenneth E. Jansen // this node is not owned by this rank so we skip 54410167291SKenneth E. Jansen } else { 54510167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 54610167291SKenneth E. Jansen glbNZ=fncorp[row[j]]; 54710167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 54810167291SKenneth E. Jansen iodiagnz[i]++; 54910167291SKenneth E. Jansen } else { 55010167291SKenneth E. Jansen idiagnz[i]++; 55110167291SKenneth E. Jansen } 55210167291SKenneth E. Jansen } 55310167291SKenneth E. Jansen i++; 55410167291SKenneth E. Jansen } 55510167291SKenneth E. Jansen } 55610167291SKenneth E. Jansen gcorp_t mind=1000; 55710167291SKenneth E. Jansen gcorp_t mino=1000; 55810167291SKenneth E. Jansen gcorp_t maxd=0; 55910167291SKenneth E. Jansen gcorp_t maxo=0; 56010167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 56110167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 56210167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 56310167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 56410167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 56510167291SKenneth E. Jansen } 56610167291SKenneth E. Jansen 56710167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 56810167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 56910167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 57010167291SKenneth E. Jansen } 57110167291SKenneth E. Jansen 57210167291SKenneth E. Jansen 57310167291SKenneth E. Jansen 57410167291SKenneth E. Jansen } 57510167291SKenneth E. Jansen if(firstpetsccalls == 1) { 57610167291SKenneth E. Jansen 57710167291SKenneth E. Jansen petsc_m = (PetscInt) iownnodes; 57810167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt; 57910167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 58010167291SKenneth E. Jansen 58110167291SKenneth E. Jansen ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 58210167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsPs); 58310167291SKenneth E. Jansen 58410167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 58510167291SKenneth E. Jansen 58610167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 587*dcce770dSKenneth E. Jansen #ifdef JEDBROWN 58810167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 589*dcce770dSKenneth E. Jansen #endif 59010167291SKenneth E. Jansen ierr = MatSetUp(lhsPs); 59110167291SKenneth E. Jansen 59210167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 59310167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 59410167291SKenneth E. Jansen } 59510167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 59610167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 59710167291SKenneth E. Jansen if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 59810167291SKenneth E. Jansen 59910167291SKenneth E. Jansen get_time(duration, (duration+1)); 60010167291SKenneth E. Jansen 60110167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 60210167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 60310167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 60410167291SKenneth E. Jansen // assert(fncorp[i]>0); 60510167291SKenneth E. Jansen // } 60610167291SKenneth E. Jansen 60710167291SKenneth E. Jansen ElmGMRPETScSclr(y, ac, x, elDw, 60810167291SKenneth E. Jansen shp, shgl, iBC, 60910167291SKenneth E. Jansen BC, shpb, 61010167291SKenneth E. Jansen shglb, res, 61110167291SKenneth E. Jansen rmes, iper, 61210167291SKenneth E. Jansen ilwork, &lhsPs); 61310167291SKenneth E. Jansen if(firstpetsccalls == 1) { 61410167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 61510167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 61610167291SKenneth E. Jansen // except for cache performance) 61710167291SKenneth E. Jansen // TODO: Better arrangment? 61810167291SKenneth E. Jansen nodetoinsert = 0; 61910167291SKenneth E. Jansen k=0; 62010167291SKenneth E. Jansen if(workfc.numpe > 1) { 62110167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 62210167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 62310167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 62410167291SKenneth E. Jansen k = k+1; 62510167291SKenneth E. Jansen } 62610167291SKenneth E. Jansen } 62710167291SKenneth E. Jansen else { 62810167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 62910167291SKenneth E. Jansen nodetoinsert = i; 63010167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 63110167291SKenneth E. Jansen k = k+1; 63210167291SKenneth E. Jansen } 63310167291SKenneth E. Jansen } 63410167291SKenneth E. Jansen 63510167291SKenneth E. Jansen for (i=0; i<nshg; i++) { 63610167291SKenneth E. Jansen gblindexsetarys[i] = i ; //TODO: better option for performance? 63710167291SKenneth E. Jansen } 63810167291SKenneth E. Jansen // Create Vector Index Maps 63910167291SKenneth E. Jansen petsc_n = (PetscInt) nshg; 64010167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 64110167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSets); 64210167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 64310167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSets); 64410167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 64510167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 64610167291SKenneth E. Jansen } 64710167291SKenneth E. Jansen if(genpar.lhs ==1) { 64810167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 64910167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 65010167291SKenneth E. Jansen } 65110167291SKenneth E. Jansen if(firstpetsccalls == 1) { 65210167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 65310167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 65410167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 65510167291SKenneth E. Jansen } 65610167291SKenneth E. Jansen ierr = VecZeroEntries(resPs); 65710167291SKenneth E. Jansen ierr = VecZeroEntries(DyPs); 65810167291SKenneth E. Jansen if(firstpetsccalls == 1) { 65910167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals); 66010167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 66110167291SKenneth E. Jansen } 66210167291SKenneth E. Jansen ierr = VecZeroEntries(resPGlobals); 66310167291SKenneth E. Jansen 66410167291SKenneth E. Jansen PetscRow=0; 66510167291SKenneth E. Jansen k = 0; 66610167291SKenneth E. Jansen int index; 66710167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 66810167291SKenneth E. Jansen valtoinsert = res[i]; 66910167291SKenneth E. Jansen if(workfc.numpe > 1) { 67010167291SKenneth E. Jansen PetscRow = (fncorp[i]-1); 67110167291SKenneth E. Jansen } 67210167291SKenneth E. Jansen else { 67310167291SKenneth E. Jansen PetscRow = i-1; 67410167291SKenneth E. Jansen } 67510167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 67610167291SKenneth E. Jansen assert(fncorp[i]>0); 67710167291SKenneth E. Jansen assert(PetscRow>=0); 67810167291SKenneth E. Jansen assert(PetscRow<=nshgt); 67910167291SKenneth E. Jansen ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 68010167291SKenneth E. Jansen } 68110167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 68210167291SKenneth E. Jansen ierr = VecAssemblyBegin(resPs); 68310167291SKenneth E. Jansen ierr = VecAssemblyEnd(resPs); 68410167291SKenneth E. Jansen 68510167291SKenneth E. Jansen if(firstpetsccalls == 1) { 68610167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 68710167291SKenneth E. Jansen ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 68810167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 68910167291SKenneth E. Jansen ierr = KSPGetPC(ksps, &pcs); 69010167291SKenneth E. Jansen ierr = PCSetType(pcs, PCPBJACOBI); 69110167291SKenneth E. Jansen PetscInt maxits; 69210167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 69310167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 69410167291SKenneth E. Jansen ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 69510167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksps); 69610167291SKenneth E. Jansen } 69710167291SKenneth E. Jansen ierr = KSPSolve(ksps, resPs, DyPs); 69810167291SKenneth E. Jansen if(firstpetsccalls == 1) { 69910167291SKenneth E. Jansen ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 70010167291SKenneth E. Jansen } 70110167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 70210167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 70310167291SKenneth E. Jansen ierr = VecGetArray(DyPLocals, &parray); 70410167291SKenneth E. Jansen PetscRow = 0; 70510167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 70610167291SKenneth E. Jansen index = node; 70710167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 70810167291SKenneth E. Jansen PetscRow = PetscRow+1; 70910167291SKenneth E. Jansen } 71010167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocals, &parray); 71110167291SKenneth E. Jansen 71210167291SKenneth E. Jansen firstpetsccalls = 0; 71310167291SKenneth E. Jansen 71410167291SKenneth E. Jansen // .... output the statistics 71510167291SKenneth E. Jansen // 71610167291SKenneth E. Jansen // itrpar.iKss=0; // see rstat() 71710167291SKenneth E. Jansen PetscInt its; 71810167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksps, &its); 71910167291SKenneth E. Jansen itrpar.iKss = (int) its; 72010167291SKenneth E. Jansen itrpar.ntotGMs += (int) its; 72110167291SKenneth E. Jansen rstatSclr (res, ilwork); 72210167291SKenneth E. Jansen // 72310167291SKenneth E. Jansen // .... end 72410167291SKenneth E. Jansen // 72510167291SKenneth E. Jansen } 72617860365SKenneth E. Jansen #endif 727