117860365SKenneth E. Jansen #ifdef HAVE_PETSC 210167291SKenneth E. Jansen #include <stdio.h> 310167291SKenneth E. Jansen 410167291SKenneth E. Jansen #include "petscsys.h" 510167291SKenneth E. Jansen #include "petscvec.h" 610167291SKenneth E. Jansen #include "petscmat.h" 710167291SKenneth E. Jansen #include "petscpc.h" 810167291SKenneth E. Jansen #include "petscksp.h" 910167291SKenneth E. Jansen 1010167291SKenneth E. Jansen #include "assert.h" 1110167291SKenneth E. Jansen #include "common_c.h" 1210167291SKenneth E. Jansen 1310167291SKenneth E. Jansen #include <FCMangle.h> 1410167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP) 1510167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR) 1610167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC) 1710167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR) 1810167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT) 1910167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR) 2010167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b)) 2110167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b)) 2210167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME) 2310167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF) 2410167291SKenneth E. Jansen 2510167291SKenneth E. Jansen typedef long long int gcorp_t; 2610167291SKenneth E. Jansen 2710167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle); 2810167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl); 2910167291SKenneth E. Jansen 3010167291SKenneth E. Jansen //#include "auxmpi.h" 3110167291SKenneth E. Jansen // static PetscOffset poff; 3210167291SKenneth E. Jansen 3310167291SKenneth E. Jansen static Mat lhsP; 3410167291SKenneth E. Jansen static PC pc; 3510167291SKenneth E. Jansen static KSP ksp; 36*0be30ed5SKenneth E. Jansen static Vec DyP, resP, DyPLocal; 3710167291SKenneth E. Jansen static PetscErrorCode ierr; 3810167291SKenneth E. Jansen static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol; 3910167291SKenneth E. Jansen static IS LocalIndexSet, GlobalIndexSet; 4010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMapping; 4110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMapping; 4210167291SKenneth E. Jansen static VecScatter scatter7; 4310167291SKenneth E. Jansen static int firstpetsccall = 1; 4410167291SKenneth E. Jansen 4510167291SKenneth E. Jansen static Mat lhsPs; 4610167291SKenneth E. Jansen static PC pcs; 4710167291SKenneth E. Jansen static KSP ksps; 48*0be30ed5SKenneth E. Jansen static Vec DyPs, resPs, DyPLocals; 4910167291SKenneth E. Jansen static IS LocalIndexSets, GlobalIndexSets; 5010167291SKenneth E. Jansen static ISLocalToGlobalMapping VectorMappings; 5110167291SKenneth E. Jansen static ISLocalToGlobalMapping GblVectorMappings; 5210167291SKenneth E. Jansen static VecScatter scatter7s; 5310167291SKenneth E. Jansen static int firstpetsccalls = 1; 5410167291SKenneth E. Jansen 55175e1b6bSKenneth E. Jansen static int rankdump=-1; // 8121 was the problem rank with 3.5.3 56175e1b6bSKenneth E. Jansen 57175e1b6bSKenneth E. Jansen 5810167291SKenneth E. Jansen void SolGMRp(double* y, double* ac, double* yold, 5910167291SKenneth E. Jansen double* x, int* iBC, double* BC, 6010167291SKenneth E. Jansen int* col, int* row, double* lhsk, 6110167291SKenneth E. Jansen double* res, double* BDiag, 6210167291SKenneth E. Jansen int* iper, 6310167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 6410167291SKenneth E. Jansen double* shglb, double* Dy, double* rerr, long long int* fncorp) 6510167291SKenneth E. Jansen { 6610167291SKenneth E. Jansen // 6710167291SKenneth E. Jansen // ---------------------------------------------------------------------- 6810167291SKenneth E. Jansen // 6910167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 7010167291SKenneth E. Jansen // 7110167291SKenneth E. Jansen // input: 7210167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 7310167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 7410167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 7510167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 7610167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 7710167291SKenneth E. Jansen // iBC (nshg) : BC codes 7810167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 7910167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 8010167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 8110167291SKenneth E. Jansen // 8210167291SKenneth E. Jansen // output: 8310167291SKenneth E. Jansen // res (nshg,nflow) : preconditioned residual 8410167291SKenneth E. Jansen // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 8510167291SKenneth E. Jansen // 8610167291SKenneth E. Jansen // 8710167291SKenneth E. Jansen // Zdenek Johan, Winter 1991. (Fortran 90) 8810167291SKenneth E. Jansen // ---------------------------------------------------------------------- 8910167291SKenneth E. Jansen // 9010167291SKenneth E. Jansen 9110167291SKenneth E. Jansen 9210167291SKenneth E. Jansen // Get variables from common_c.h 9310167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 9410167291SKenneth E. Jansen nshg = conpar.nshg; 9510167291SKenneth E. Jansen nflow = conpar.nflow; 9610167291SKenneth E. Jansen nsd = NSD; 9710167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 9810167291SKenneth E. Jansen 9910167291SKenneth E. Jansen gcorp_t nshgt; 10010167291SKenneth E. Jansen gcorp_t mbeg; 10110167291SKenneth E. Jansen gcorp_t mend; 10210167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 10310167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 10410167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 10510167291SKenneth E. Jansen 10610167291SKenneth E. Jansen 10710167291SKenneth E. Jansen int node, element, var, eqn; 10810167291SKenneth E. Jansen double valtoinsert; 10910167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 11010167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 11110167291SKenneth E. Jansen double DyFlat[nshg*nflow]; 11210167291SKenneth E. Jansen double DyFlatPhasta[nshg*nflow]; 11310167291SKenneth E. Jansen double rmes[nshg*nflow]; 11410167291SKenneth E. Jansen // DEBUG 11510167291SKenneth E. Jansen int i,j,k,l,m; 11610167291SKenneth E. Jansen 11710167291SKenneth E. Jansen // FIXME: PetscScalar 11810167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 11910167291SKenneth E. Jansen // /DEBUG 12010167291SKenneth E. Jansen //double parray[1]; // Should be a PetscScalar 12110167291SKenneth E. Jansen double *parray; // Should be a PetscScalar 12210167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 12310167291SKenneth E. Jansen PetscInt petsc_n; 12410167291SKenneth E. Jansen PetscOne = 1; 12510167291SKenneth E. Jansen uint64_t duration[4]; 12610167291SKenneth E. Jansen 12710167291SKenneth E. Jansen // printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2)); 12810167291SKenneth E. Jansen 12910167291SKenneth E. Jansen if(firstpetsccall == 1) { 13010167291SKenneth E. Jansen /* call get_time(duration(1),duration(2)); 13110167291SKenneth E. Jansen call system('sleep 10'); 13210167291SKenneth E. Jansen call get_time(duration(3),duration(4)); 13310167291SKenneth E. Jansen call get_max_time_diff(duration(1), duration(3), 13410167291SKenneth E. Jansen duration(2), duration(4), 13510167291SKenneth E. Jansen "TenSecondSleep " // char(0)) 13610167291SKenneth E. Jansen if(myrank .eq. 0) { 13710167291SKenneth E. Jansen !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed 13810167291SKenneth E. Jansen } 13910167291SKenneth E. Jansen */ 14010167291SKenneth E. Jansen } 14110167291SKenneth E. Jansen // 14210167291SKenneth E. Jansen // 14310167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 14410167291SKenneth E. Jansen // 14510167291SKenneth E. Jansen // 14610167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 14710167291SKenneth E. Jansen // 14810167291SKenneth E. Jansen // 14910167291SKenneth E. Jansen genpar.idflx = 0 ; 15010167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 15110167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 15210167291SKenneth E. Jansen 15310167291SKenneth E. Jansen 15410167291SKenneth E. Jansen 15510167291SKenneth E. Jansen gcorp_t glbNZ; 15610167291SKenneth E. Jansen 15710167291SKenneth E. Jansen if(firstpetsccall == 1) { 158f4d0b58bSKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 159f4d0b58bSKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 16010167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 16110167291SKenneth E. Jansen idiagnz[i]=0; 16210167291SKenneth E. Jansen iodiagnz[i]=0; 16310167291SKenneth E. Jansen } 16410167291SKenneth E. Jansen i=0; 16510167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 16610167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 16710167291SKenneth E. Jansen // this node is not owned by this rank so we skip 16810167291SKenneth E. Jansen } else { 16910167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 17079f1763eSKenneth E. Jansen // assert(row[j]<=nshg); 17179f1763eSKenneth E. Jansen // assert(fncorp[row[j]-1]<=nshgt); 17279f1763eSKenneth E. Jansen glbNZ=fncorp[row[j]-1]; 17310167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 17410167291SKenneth E. Jansen iodiagnz[i]++; 17510167291SKenneth E. Jansen } else { 17610167291SKenneth E. Jansen idiagnz[i]++; 17710167291SKenneth E. Jansen } 17810167291SKenneth E. Jansen } 17910167291SKenneth E. Jansen i++; 18010167291SKenneth E. Jansen } 18110167291SKenneth E. Jansen } 18210167291SKenneth E. Jansen gcorp_t mind=1000; 18310167291SKenneth E. Jansen gcorp_t mino=1000; 18410167291SKenneth E. Jansen gcorp_t maxd=0; 18510167291SKenneth E. Jansen gcorp_t maxo=0; 18610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 18710167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 18810167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 18910167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 19010167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 19110167291SKenneth E. Jansen // iodiagnz[i]=max(iodiagnz[i],10); 19210167291SKenneth E. Jansen // idiagnz[i]=max(idiagnz[i],10); 19310167291SKenneth E. Jansen // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions 19410167291SKenneth E. Jansen // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions 19510167291SKenneth E. Jansen } 19610167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this 19710167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization 19810167291SKenneth E. Jansen 19910167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 20010167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 20110167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 20210167291SKenneth E. Jansen } 20310167291SKenneth E. Jansen 20410167291SKenneth E. Jansen 20510167291SKenneth E. Jansen 20610167291SKenneth E. Jansen if(workfc.numpe < 200){ 20710167291SKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 20810167291SKenneth E. Jansen printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo); 20910167291SKenneth E. Jansen } 21010167291SKenneth E. Jansen // Print debug info 21110167291SKenneth E. Jansen if(nshgt < 200){ 21210167291SKenneth E. Jansen int irank; 21310167291SKenneth E. Jansen for(irank=0;irank<workfc.numpe;irank++) { 21410167291SKenneth E. Jansen if(irank == workfc.myrank){ 21510167291SKenneth E. Jansen printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank); 21610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 21710167291SKenneth E. Jansen printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]); 21810167291SKenneth E. Jansen } 21910167291SKenneth E. Jansen } 22010167291SKenneth E. Jansen MPI_Barrier(MPI_COMM_WORLD); 22110167291SKenneth E. Jansen } 22210167291SKenneth E. Jansen } 223f4d0b58bSKenneth E. Jansen // } 224f4d0b58bSKenneth E. Jansen // if(firstpetsccall == 1) { 22510167291SKenneth E. Jansen 22610167291SKenneth E. Jansen petsc_bs = (PetscInt) nflow; 22710167291SKenneth E. Jansen petsc_m = (PetscInt) nflow* (PetscInt) iownnodes; 22810167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt * (PetscInt) nflow; 22910167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 23010167291SKenneth E. Jansen // TODO: fixe nshgt for 64 bit integer!!!!! 23110167291SKenneth E. Jansen 23210167291SKenneth E. Jansen //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes, 23310167291SKenneth E. Jansen // nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0, 23410167291SKenneth E. Jansen 23510167291SKenneth E. Jansen // this has no pre-allocate ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 23610167291SKenneth E. Jansen // this has no pre-allocate 0, NULL, 0, NULL, &lhsP); 23710167291SKenneth E. Jansen 23810167291SKenneth E. Jansen // next 2 do pre-allocate 23910167291SKenneth E. Jansen 24010167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 24110167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA); 24210167291SKenneth E. Jansen 24310167291SKenneth E. Jansen ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M, 24410167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsP); 24510167291SKenneth E. Jansen 24610167291SKenneth E. Jansen // petsc_PA, NULL, petsc_PA, NULL, &lhsP); 24710167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 24810167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before matSetOoption 1 \n"); 24910167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 25010167291SKenneth E. Jansen 25110167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 252175e1b6bSKenneth E. Jansen #ifdef HIDEJEDBROWN 25310167291SKenneth E. Jansen ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 254dcce770dSKenneth E. Jansen #endif 25510167291SKenneth E. Jansen ierr = MatSetUp(lhsP); 25610167291SKenneth E. Jansen 25710167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 25810167291SKenneth E. Jansen ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd); 259175e1b6bSKenneth E. Jansen //debug 260175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 26110167291SKenneth E. Jansen } 26210167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26310167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 26410167291SKenneth E. Jansen if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP); 26510167291SKenneth E. Jansen 26610167291SKenneth E. Jansen get_time(duration, (duration+1)); 26710167291SKenneth E. Jansen 26810167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 26910167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 27010167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 27110167291SKenneth E. Jansen // assert(fncorp[i]>0); 27210167291SKenneth E. Jansen // } 27310167291SKenneth E. Jansen 27410167291SKenneth E. Jansen ElmGMRPETSc(y, ac, x, 27510167291SKenneth E. Jansen shp, shgl, iBC, 27610167291SKenneth E. Jansen BC, shpb, 27710167291SKenneth E. Jansen shglb, res, 27810167291SKenneth E. Jansen rmes, iper, 27910167291SKenneth E. Jansen ilwork, rerr , &lhsP); 280*0be30ed5SKenneth E. Jansen // Below was just to confirm that what is in rstat is the res or b vector given to PETSc 281*0be30ed5SKenneth E. Jansen // rstat (res, ilwork); 282*0be30ed5SKenneth E. Jansen // if(workfc.myrank ==0) printf("residual after ElmGMRPETSc\n"); 28310167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 28410167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 28510167291SKenneth E. Jansen (duration+1), (duration+3), 28610167291SKenneth E. Jansen "ElmGMRPETSc \0"); // char(0)) 28710167291SKenneth E. Jansen // printf("after ElmGMRPETSc\n"); 28810167291SKenneth E. Jansen if(firstpetsccall == 1) { 28910167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 29010167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 29110167291SKenneth E. Jansen // except for cache performance) 29210167291SKenneth E. Jansen // TODO: Better arrangment? 2938636783dSKenneth E. Jansen //PetscInt indexsetary[nflow*nshg]; 2948636783dSKenneth E. Jansen PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 2958636783dSKenneth E. Jansen PetscInt* gblindexsetary = malloc(sizeof(PetscInt)*nflow*nshg); 2968636783dSKenneth E. Jansen PetscInt nodetoinsert; 29710167291SKenneth E. Jansen nodetoinsert = 0; 29810167291SKenneth E. Jansen k=0; 299175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 300175e1b6bSKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 301175e1b6bSKenneth E. Jansen printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 302175e1b6bSKenneth E. Jansen } 30310167291SKenneth E. Jansen if(workfc.numpe > 1) { 30410167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 30510167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 306175e1b6bSKenneth E. Jansen 307175e1b6bSKenneth E. Jansen //debug 308175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 309175e1b6bSKenneth E. Jansen printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 310175e1b6bSKenneth E. Jansen } 311175e1b6bSKenneth E. Jansen 31210167291SKenneth E. Jansen // if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert); 31310167291SKenneth E. Jansen // assert(fncorp[i]>=0); 31410167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 31510167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 31610167291SKenneth E. Jansen assert(indexsetary[k]>=0); 31710167291SKenneth E. Jansen // assert(fncorp[i]>=0); 31810167291SKenneth E. Jansen k = k+1; 31910167291SKenneth E. Jansen } 32010167291SKenneth E. Jansen } 32110167291SKenneth E. Jansen } 32210167291SKenneth E. Jansen else { 32310167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 32410167291SKenneth E. Jansen nodetoinsert = i; 32510167291SKenneth E. Jansen for (j=1; j<=nflow; j++) { 32610167291SKenneth E. Jansen indexsetary[k] = nodetoinsert*nflow+(j-1); 32710167291SKenneth E. Jansen k = k+1; 32810167291SKenneth E. Jansen } 32910167291SKenneth E. Jansen } 33010167291SKenneth E. Jansen } 33110167291SKenneth E. Jansen 33210167291SKenneth E. Jansen for (i=0; i<nshg*nflow; i++) { 33310167291SKenneth E. Jansen gblindexsetary[i] = i ; //TODO: better option for performance? 334175e1b6bSKenneth E. Jansen //debug 335175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 336175e1b6bSKenneth E. Jansen printf("myrank,i,glbindexsetary %d %d %d \n",workfc.myrank,i,gblindexsetary[i]); 337175e1b6bSKenneth E. Jansen } 33810167291SKenneth E. Jansen } 33910167291SKenneth E. Jansen // Create Vector Index Maps 34010167291SKenneth E. Jansen petsc_n = (PetscInt) nshg * (PetscInt) nflow; 34110167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary, 34210167291SKenneth E. Jansen // PETSC_COPY_VALUES, &LocalIndexSet); 34310167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary, 34410167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSet); 34510167291SKenneth E. Jansen // printf("after 1st ISCreateGeneral %d\n", ierr); 34610167291SKenneth E. Jansen //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary, 34710167291SKenneth E. Jansen // PETSC_COPY_VALUES, &GlobalIndexSet); 34810167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary, 34910167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSet); 35010167291SKenneth E. Jansen // printf("after 2nd ISCreateGeneral %d\n", ierr); 35110167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping); 35210167291SKenneth E. Jansen // printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr); 35310167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping); 35410167291SKenneth E. Jansen // printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr); 3558636783dSKenneth E. Jansen free(indexsetary); 3568636783dSKenneth E. Jansen free(gblindexsetary); 35710167291SKenneth E. Jansen } 35810167291SKenneth E. Jansen if(genpar.lhs == 1) { 35910167291SKenneth E. Jansen get_time((duration), (duration+1)); 36010167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY); 36110167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY); 36210167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 36310167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 36410167291SKenneth E. Jansen (duration+1), (duration+3), 36510167291SKenneth E. Jansen "MatAssembly \0"); // char(0)) 36610167291SKenneth E. Jansen get_time(duration, (duration+1)); 36710167291SKenneth E. Jansen } 36810167291SKenneth E. Jansen if(firstpetsccall == 1) { 36910167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol); 37010167291SKenneth E. Jansen // TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol 37110167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP); 37210167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP); 37310167291SKenneth E. Jansen //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP); 37410167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP); 37510167291SKenneth E. Jansen } 37610167291SKenneth E. Jansen ierr = VecZeroEntries(resP); 37710167291SKenneth E. Jansen ierr = VecZeroEntries(DyP); 37810167291SKenneth E. Jansen if(firstpetsccall == 1) { 37910167291SKenneth E. Jansen //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal); 38010167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal); 38110167291SKenneth E. Jansen } 38210167291SKenneth E. Jansen // call VecZeroEntries(DyPLocal, ierr) 38310167291SKenneth E. Jansen 38410167291SKenneth E. Jansen PetscRow=0; 38510167291SKenneth E. Jansen k = 0; 38610167291SKenneth E. Jansen int index; 38710167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 38810167291SKenneth E. Jansen for (j = 1; j<=nflow; j++){ 38910167291SKenneth E. Jansen index = i + (j-1)*nshg; 39010167291SKenneth E. Jansen valtoinsert = res[index]; 39110167291SKenneth E. Jansen if(workfc.numpe > 1) { 39210167291SKenneth E. Jansen PetscRow = (fncorp[i]-1)*nflow+(j-1); 39310167291SKenneth E. Jansen } 39410167291SKenneth E. Jansen else { 39510167291SKenneth E. Jansen PetscRow = i*nflow+(j-1); 39610167291SKenneth E. Jansen } 39710167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 39810167291SKenneth E. Jansen assert(fncorp[i]>0); 39910167291SKenneth E. Jansen assert(PetscRow>=0); 40010167291SKenneth E. Jansen assert(PetscRow<=nshgt*nflow); 40110167291SKenneth E. Jansen ierr = VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES); 40210167291SKenneth E. Jansen } 40310167291SKenneth E. Jansen } 40410167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 40510167291SKenneth E. Jansen ierr = VecAssemblyBegin(resP); 40610167291SKenneth E. Jansen ierr = VecAssemblyEnd(resP); 40710167291SKenneth E. Jansen get_time((duration+2), (duration+3)); 40810167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 40910167291SKenneth E. Jansen (duration+1), (duration+3), 41010167291SKenneth E. Jansen "VectorWorkPre \0"); // char(0)) 41110167291SKenneth E. Jansen 41210167291SKenneth E. Jansen get_time((duration),(duration+1)); 41310167291SKenneth E. Jansen 41410167291SKenneth E. Jansen if(firstpetsccall == 1) { 41510167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); 41610167291SKenneth E. Jansen ierr = KSPSetOperators(ksp, lhsP, lhsP); 41710167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN); 41810167291SKenneth E. Jansen ierr = KSPGetPC(ksp, &pc); 41910167291SKenneth E. Jansen ierr = PCSetType(pc, PCPBJACOBI); 42010167291SKenneth E. Jansen PetscInt maxits; 42110167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 42210167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 42310167291SKenneth E. Jansen ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 42410167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksp); 42510167291SKenneth E. Jansen } 42610167291SKenneth E. Jansen ierr = KSPSolve(ksp, resP, DyP); 42710167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 42810167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 42910167291SKenneth E. Jansen (duration+1), (duration+3), 43010167291SKenneth E. Jansen "KSPSolve \0"); // char(0)) 43110167291SKenneth E. Jansen //if(myrank .eq. 0) then 43210167291SKenneth E. Jansen //!write(*,"(A, E10.3)") "KSPSolve ", elapsed 43310167291SKenneth E. Jansen //end if 43410167291SKenneth E. Jansen get_time((duration),(duration+1)); 43510167291SKenneth E. Jansen if(firstpetsccall == 1) { 43610167291SKenneth E. Jansen ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7); 43710167291SKenneth E. Jansen } 43810167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 43910167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD); 44010167291SKenneth E. Jansen ierr = VecGetArray(DyPLocal, &parray); 44110167291SKenneth E. Jansen PetscRow = 0; 44210167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 44310167291SKenneth E. Jansen for (var = 1; var<= nflow; var++) { 44410167291SKenneth E. Jansen index = node + (var-1)*nshg; 44510167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 44610167291SKenneth E. Jansen PetscRow = PetscRow+1; 44710167291SKenneth E. Jansen } 44810167291SKenneth E. Jansen } 44910167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocal, &parray); 45010167291SKenneth E. Jansen 45110167291SKenneth E. Jansen firstpetsccall = 0; 45210167291SKenneth E. Jansen // later timer ('Solver '); 45310167291SKenneth E. Jansen 4548636783dSKenneth E. Jansen 45510167291SKenneth E. Jansen // .... output the statistics 45610167291SKenneth E. Jansen // 45710167291SKenneth E. Jansen itrpar.iKs=0; // see rstat() 45810167291SKenneth E. Jansen PetscInt its; 45910167291SKenneth E. Jansen //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs); 46010167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksp, &its); 46110167291SKenneth E. Jansen itrpar.iKs = (int) its; 46210167291SKenneth E. Jansen get_time((duration+2),(duration+3)); 46310167291SKenneth E. Jansen get_max_time_diff((duration), (duration+2), 46410167291SKenneth E. Jansen (duration+1), (duration+3), 46510167291SKenneth E. Jansen "solWork \0"); // char(0)) 46610167291SKenneth E. Jansen itrpar.ntotGM += (int) its; 46710167291SKenneth E. Jansen rstat (res, ilwork); 46810167291SKenneth E. Jansen // 46910167291SKenneth E. Jansen // .... stop the timer 47010167291SKenneth E. Jansen // 47110167291SKenneth E. Jansen // 3002 continue ! no solve just res. 47210167291SKenneth E. Jansen // later call timer ('Back ') 47310167291SKenneth E. Jansen // 47410167291SKenneth E. Jansen // .... end 47510167291SKenneth E. Jansen // 47610167291SKenneth E. Jansen } 47710167291SKenneth E. Jansen 47810167291SKenneth E. Jansen void SolGMRpSclr(double* y, double* ac, 47910167291SKenneth E. Jansen double* x, double* elDw, int* iBC, double* BC, 48010167291SKenneth E. Jansen int* col, int* row, 48110167291SKenneth E. Jansen int* iper, 48210167291SKenneth E. Jansen int* ilwork, double* shp, double* shgl, double* shpb, 48310167291SKenneth E. Jansen double* shglb, double* res, double* Dy, long long int* fncorp) 48410167291SKenneth E. Jansen { 48510167291SKenneth E. Jansen // 48610167291SKenneth E. Jansen // ---------------------------------------------------------------------- 48710167291SKenneth E. Jansen // 48810167291SKenneth E. Jansen // This is the preconditioned GMRES driver routine. 48910167291SKenneth E. Jansen // 49010167291SKenneth E. Jansen // input: 49110167291SKenneth E. Jansen // y (nshg,ndof) : Y-variables at n+alpha_v 49210167291SKenneth E. Jansen // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 49310167291SKenneth E. Jansen // yold (nshg,ndof) : Y-variables at beginning of step 49410167291SKenneth E. Jansen // acold (nshg,ndof) : Primvar. accel. variable at begng step 49510167291SKenneth E. Jansen // x (numnp,nsd) : node coordinates 49610167291SKenneth E. Jansen // iBC (nshg) : BC codes 49710167291SKenneth E. Jansen // BC (nshg,ndofBC) : BC constraint parameters 49810167291SKenneth E. Jansen // shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 49910167291SKenneth E. Jansen // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 50010167291SKenneth E. Jansen // 50110167291SKenneth E. Jansen // ---------------------------------------------------------------------- 50210167291SKenneth E. Jansen // 50310167291SKenneth E. Jansen 50410167291SKenneth E. Jansen 50510167291SKenneth E. Jansen // Get variables from common_c.h 50610167291SKenneth E. Jansen int nshg, nflow, nsd, iownnodes; 50710167291SKenneth E. Jansen nshg = conpar.nshg; 50810167291SKenneth E. Jansen nsd = NSD; 50910167291SKenneth E. Jansen iownnodes = conpar.iownnodes; 51010167291SKenneth E. Jansen 51110167291SKenneth E. Jansen gcorp_t nshgt; 51210167291SKenneth E. Jansen gcorp_t mbeg; 51310167291SKenneth E. Jansen gcorp_t mend; 51410167291SKenneth E. Jansen nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h 51510167291SKenneth E. Jansen mbeg = (gcorp_t) newdim.minowned; 51610167291SKenneth E. Jansen mend = (gcorp_t) newdim.maxowned; 51710167291SKenneth E. Jansen 51810167291SKenneth E. Jansen 51910167291SKenneth E. Jansen int node, element, var, eqn; 52010167291SKenneth E. Jansen double valtoinsert; 52110167291SKenneth E. Jansen int nenl, iel, lelCat, lcsyst, iorder, nshl; 52210167291SKenneth E. Jansen int mattyp, ndofl, nsymdl, npro, ngauss, nppro; 52310167291SKenneth E. Jansen double DyFlats[nshg]; 52410167291SKenneth E. Jansen double DyFlatPhastas[nshg]; 52510167291SKenneth E. Jansen double rmes[nshg]; 52610167291SKenneth E. Jansen // DEBUG 52710167291SKenneth E. Jansen int i,j,k,l,m; 52810167291SKenneth E. Jansen 52910167291SKenneth E. Jansen double real_rtol, real_abstol, real_dtol; 53010167291SKenneth E. Jansen double *parray; 53110167291SKenneth E. Jansen PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA; 53210167291SKenneth E. Jansen PetscInt petsc_n; 53310167291SKenneth E. Jansen PetscOne = 1; 53410167291SKenneth E. Jansen uint64_t duration[4]; 53510167291SKenneth E. Jansen 53610167291SKenneth E. Jansen 53710167291SKenneth E. Jansen // 53810167291SKenneth E. Jansen // 53910167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<****************** 54010167291SKenneth E. Jansen // 54110167291SKenneth E. Jansen // 54210167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations 54310167291SKenneth E. Jansen // 54410167291SKenneth E. Jansen // 54510167291SKenneth E. Jansen genpar.idflx = 0 ; 54610167291SKenneth E. Jansen if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd; 54710167291SKenneth E. Jansen if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd; 54810167291SKenneth E. Jansen 54910167291SKenneth E. Jansen 55010167291SKenneth E. Jansen 55110167291SKenneth E. Jansen gcorp_t glbNZ; 55210167291SKenneth E. Jansen 55310167291SKenneth E. Jansen if(firstpetsccalls == 1) { 554f4d0b58bSKenneth E. Jansen PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 555f4d0b58bSKenneth E. Jansen PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes); 55610167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 55710167291SKenneth E. Jansen idiagnz[i]=0; 55810167291SKenneth E. Jansen iodiagnz[i]=0; 55910167291SKenneth E. Jansen } 56010167291SKenneth E. Jansen i=0; 56110167291SKenneth E. Jansen for(k=0;k<nshg;k++) { 56210167291SKenneth E. Jansen if((fncorp[k] < mbeg) || (fncorp[k] >mend)){ 56310167291SKenneth E. Jansen // this node is not owned by this rank so we skip 56410167291SKenneth E. Jansen } else { 56510167291SKenneth E. Jansen for(j=col[i]-1;j<col[i+1]-1;j++) { 566f4d0b58bSKenneth E. Jansen glbNZ=fncorp[row[j]-1]; 56710167291SKenneth E. Jansen if((glbNZ < mbeg) || (glbNZ > mend)) { 56810167291SKenneth E. Jansen iodiagnz[i]++; 56910167291SKenneth E. Jansen } else { 57010167291SKenneth E. Jansen idiagnz[i]++; 57110167291SKenneth E. Jansen } 57210167291SKenneth E. Jansen } 57310167291SKenneth E. Jansen i++; 57410167291SKenneth E. Jansen } 57510167291SKenneth E. Jansen } 57610167291SKenneth E. Jansen gcorp_t mind=1000; 57710167291SKenneth E. Jansen gcorp_t mino=1000; 57810167291SKenneth E. Jansen gcorp_t maxd=0; 57910167291SKenneth E. Jansen gcorp_t maxo=0; 58010167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 58110167291SKenneth E. Jansen mind=min(mind,idiagnz[i]); 58210167291SKenneth E. Jansen mino=min(mino,iodiagnz[i]); 58310167291SKenneth E. Jansen maxd=max(maxd,idiagnz[i]); 58410167291SKenneth E. Jansen maxo=max(maxo,iodiagnz[i]); 58510167291SKenneth E. Jansen } 58610167291SKenneth E. Jansen 58710167291SKenneth E. Jansen for(i=0;i<iownnodes;i++) { 58810167291SKenneth E. Jansen iodiagnz[i]=1.3*maxd; 58910167291SKenneth E. Jansen idiagnz[i]=1.3*maxd; 59010167291SKenneth E. Jansen } 59110167291SKenneth E. Jansen 59210167291SKenneth E. Jansen 59310167291SKenneth E. Jansen 594f4d0b58bSKenneth E. Jansen // } 595f4d0b58bSKenneth E. Jansen // if(firstpetsccalls == 1) { 59610167291SKenneth E. Jansen 59710167291SKenneth E. Jansen petsc_m = (PetscInt) iownnodes; 59810167291SKenneth E. Jansen petsc_M = (PetscInt) nshgt; 59910167291SKenneth E. Jansen petsc_PA = (PetscInt) 40; 60010167291SKenneth E. Jansen 60110167291SKenneth E. Jansen ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M, 60210167291SKenneth E. Jansen 0, idiagnz, 0, iodiagnz, &lhsPs); 60310167291SKenneth E. Jansen 60410167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE); 60510167291SKenneth E. Jansen 60610167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call 607175e1b6bSKenneth E. Jansen #ifdef HIDEJEDBROWN 60810167291SKenneth E. Jansen ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE); 609dcce770dSKenneth E. Jansen #endif 61010167291SKenneth E. Jansen ierr = MatSetUp(lhsPs); 61110167291SKenneth E. Jansen 61210167291SKenneth E. Jansen PetscInt myMatStart, myMatEnd; 613175e1b6bSKenneth E. Jansen ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd); 614175e1b6bSKenneth E. Jansen if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd); 61510167291SKenneth E. Jansen } 61610167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 61710167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before MatZeroEntries \n"); 61810167291SKenneth E. Jansen if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs); 61910167291SKenneth E. Jansen 62010167291SKenneth E. Jansen get_time(duration, (duration+1)); 62110167291SKenneth E. Jansen 62210167291SKenneth E. Jansen // MPI_Barrier(MPI_COMM_WORLD); 62310167291SKenneth E. Jansen // if(workfc.myrank ==0) printf("Before elmgmr \n"); 62410167291SKenneth E. Jansen // for (i=0; i<nshg ; i++) { 62510167291SKenneth E. Jansen // assert(fncorp[i]>0); 62610167291SKenneth E. Jansen // } 62710167291SKenneth E. Jansen 62810167291SKenneth E. Jansen ElmGMRPETScSclr(y, ac, x, elDw, 62910167291SKenneth E. Jansen shp, shgl, iBC, 63010167291SKenneth E. Jansen BC, shpb, 63110167291SKenneth E. Jansen shglb, res, 63210167291SKenneth E. Jansen rmes, iper, 63310167291SKenneth E. Jansen ilwork, &lhsPs); 63410167291SKenneth E. Jansen if(firstpetsccalls == 1) { 63510167291SKenneth E. Jansen // Setup IndexSet. For now, we mimic vector insertion procedure 63610167291SKenneth E. Jansen // Since we always reference by global indexes this doesn't matter 63710167291SKenneth E. Jansen // except for cache performance) 63810167291SKenneth E. Jansen // TODO: Better arrangment? 6398636783dSKenneth E. Jansen 6408636783dSKenneth E. Jansen PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg); 6418636783dSKenneth E. Jansen PetscInt* gblindexsetarys = malloc(sizeof(PetscInt)*nshg); 6428636783dSKenneth E. Jansen PetscInt nodetoinsert; 6438636783dSKenneth E. Jansen 64410167291SKenneth E. Jansen nodetoinsert = 0; 64510167291SKenneth E. Jansen k=0; 646175e1b6bSKenneth E. Jansen 647175e1b6bSKenneth E. Jansen //debug 648175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 649175e1b6bSKenneth E. Jansen printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg); 650175e1b6bSKenneth E. Jansen printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend); 651175e1b6bSKenneth E. Jansen } 652175e1b6bSKenneth E. Jansen 65310167291SKenneth E. Jansen if(workfc.numpe > 1) { 65410167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 65510167291SKenneth E. Jansen nodetoinsert = fncorp[i]-1; 656175e1b6bSKenneth E. Jansen //debug 657175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 658175e1b6bSKenneth E. Jansen printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert); 659175e1b6bSKenneth E. Jansen } 660175e1b6bSKenneth E. Jansen 66110167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 66210167291SKenneth E. Jansen k = k+1; 66310167291SKenneth E. Jansen } 66410167291SKenneth E. Jansen } 66510167291SKenneth E. Jansen else { 66610167291SKenneth E. Jansen for (i=0; i<nshg ; i++) { 66710167291SKenneth E. Jansen nodetoinsert = i; 66810167291SKenneth E. Jansen indexsetarys[k] = nodetoinsert; 66910167291SKenneth E. Jansen k = k+1; 67010167291SKenneth E. Jansen } 67110167291SKenneth E. Jansen } 67210167291SKenneth E. Jansen 67310167291SKenneth E. Jansen for (i=0; i<nshg; i++) { 67410167291SKenneth E. Jansen gblindexsetarys[i] = i ; //TODO: better option for performance? 675175e1b6bSKenneth E. Jansen //debug 676175e1b6bSKenneth E. Jansen if(workfc.myrank == rankdump) { 677175e1b6bSKenneth E. Jansen printf("myrank,i,glbindexsetarys %d %d %d \n",workfc.myrank,i,gblindexsetarys[i]); 678175e1b6bSKenneth E. Jansen } 67910167291SKenneth E. Jansen } 68010167291SKenneth E. Jansen // Create Vector Index Maps 68110167291SKenneth E. Jansen petsc_n = (PetscInt) nshg; 68210167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys, 68310167291SKenneth E. Jansen PETSC_COPY_VALUES, &LocalIndexSets); 68410167291SKenneth E. Jansen ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys, 68510167291SKenneth E. Jansen PETSC_COPY_VALUES, &GlobalIndexSets); 68610167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings); 68710167291SKenneth E. Jansen ierr = ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings); 6888636783dSKenneth E. Jansen free(indexsetarys); 6898636783dSKenneth E. Jansen free(gblindexsetarys); 69010167291SKenneth E. Jansen } 69110167291SKenneth E. Jansen if(genpar.lhs ==1) { 69210167291SKenneth E. Jansen ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY); 69310167291SKenneth E. Jansen ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY); 69410167291SKenneth E. Jansen } 69510167291SKenneth E. Jansen if(firstpetsccalls == 1) { 69610167291SKenneth E. Jansen ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol); 69710167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs); 69810167291SKenneth E. Jansen ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs); 69910167291SKenneth E. Jansen } 70010167291SKenneth E. Jansen ierr = VecZeroEntries(resPs); 70110167291SKenneth E. Jansen ierr = VecZeroEntries(DyPs); 70210167291SKenneth E. Jansen if(firstpetsccalls == 1) { 70310167291SKenneth E. Jansen ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals); 70410167291SKenneth E. Jansen } 70510167291SKenneth E. Jansen 70610167291SKenneth E. Jansen PetscRow=0; 70710167291SKenneth E. Jansen k = 0; 70810167291SKenneth E. Jansen int index; 70910167291SKenneth E. Jansen for (i=0; i<nshg; i++ ){ 71010167291SKenneth E. Jansen valtoinsert = res[i]; 71110167291SKenneth E. Jansen if(workfc.numpe > 1) { 71210167291SKenneth E. Jansen PetscRow = (fncorp[i]-1); 71310167291SKenneth E. Jansen } 71410167291SKenneth E. Jansen else { 71510167291SKenneth E. Jansen PetscRow = i-1; 71610167291SKenneth E. Jansen } 71710167291SKenneth E. Jansen assert(fncorp[i]<=nshgt); 71810167291SKenneth E. Jansen assert(fncorp[i]>0); 71910167291SKenneth E. Jansen assert(PetscRow>=0); 72010167291SKenneth E. Jansen assert(PetscRow<=nshgt); 72110167291SKenneth E. Jansen ierr = VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES); 72210167291SKenneth E. Jansen } 72310167291SKenneth E. Jansen // printf("after VecSetValue loop %d\n",ierr); 72410167291SKenneth E. Jansen ierr = VecAssemblyBegin(resPs); 72510167291SKenneth E. Jansen ierr = VecAssemblyEnd(resPs); 72610167291SKenneth E. Jansen 72710167291SKenneth E. Jansen if(firstpetsccalls == 1) { 72810167291SKenneth E. Jansen ierr = KSPCreate(PETSC_COMM_WORLD, &ksps); 72910167291SKenneth E. Jansen ierr = KSPSetOperators(ksps, lhsPs, lhsPs); 73010167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN); 73110167291SKenneth E. Jansen ierr = KSPGetPC(ksps, &pcs); 73210167291SKenneth E. Jansen ierr = PCSetType(pcs, PCPBJACOBI); 73310167291SKenneth E. Jansen PetscInt maxits; 73410167291SKenneth E. Jansen maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace; 73510167291SKenneth E. Jansen //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace); 73610167291SKenneth E. Jansen ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits); 73710167291SKenneth E. Jansen ierr = KSPSetFromOptions(ksps); 73810167291SKenneth E. Jansen } 73910167291SKenneth E. Jansen ierr = KSPSolve(ksps, resPs, DyPs); 74010167291SKenneth E. Jansen if(firstpetsccalls == 1) { 74110167291SKenneth E. Jansen ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s); 74210167291SKenneth E. Jansen } 74310167291SKenneth E. Jansen ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 74410167291SKenneth E. Jansen ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD); 74510167291SKenneth E. Jansen ierr = VecGetArray(DyPLocals, &parray); 74610167291SKenneth E. Jansen PetscRow = 0; 74710167291SKenneth E. Jansen for ( node = 0; node< nshg; node++) { 74810167291SKenneth E. Jansen index = node; 74910167291SKenneth E. Jansen Dy[index] = parray[PetscRow]; 75010167291SKenneth E. Jansen PetscRow = PetscRow+1; 75110167291SKenneth E. Jansen } 75210167291SKenneth E. Jansen ierr = VecRestoreArray(DyPLocals, &parray); 75310167291SKenneth E. Jansen 75410167291SKenneth E. Jansen firstpetsccalls = 0; 75510167291SKenneth E. Jansen 75610167291SKenneth E. Jansen // .... output the statistics 75710167291SKenneth E. Jansen // 75810167291SKenneth E. Jansen // itrpar.iKss=0; // see rstat() 75910167291SKenneth E. Jansen PetscInt its; 76010167291SKenneth E. Jansen ierr = KSPGetIterationNumber(ksps, &its); 76110167291SKenneth E. Jansen itrpar.iKss = (int) its; 76210167291SKenneth E. Jansen itrpar.ntotGMs += (int) its; 76310167291SKenneth E. Jansen rstatSclr (res, ilwork); 76410167291SKenneth E. Jansen // 76510167291SKenneth E. Jansen // .... end 76610167291SKenneth E. Jansen // 76710167291SKenneth E. Jansen } 76817860365SKenneth E. Jansen #endif 769