xref: /phasta/phSolver/compressible/solgmrpetsc.c (revision 79f1763e38fe03466335328e9c0a1a0a45544a96)
117860365SKenneth E. Jansen #ifdef HAVE_PETSC
210167291SKenneth E. Jansen #include <stdio.h>
310167291SKenneth E. Jansen 
410167291SKenneth E. Jansen #include "petscsys.h"
510167291SKenneth E. Jansen #include "petscvec.h"
610167291SKenneth E. Jansen #include "petscmat.h"
710167291SKenneth E. Jansen #include "petscpc.h"
810167291SKenneth E. Jansen #include "petscksp.h"
910167291SKenneth E. Jansen 
1010167291SKenneth E. Jansen #include "assert.h"
1110167291SKenneth E. Jansen #include "common_c.h"
1210167291SKenneth E. Jansen 
1310167291SKenneth E. Jansen #include <FCMangle.h>
1410167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
1510167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
1610167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
1710167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
1810167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT)
1910167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR)
2010167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b))
2110167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b))
2210167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
2310167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
2410167291SKenneth E. Jansen 
2510167291SKenneth E. Jansen typedef long long int gcorp_t;
2610167291SKenneth E. Jansen 
2710167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle);
2810167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
2910167291SKenneth E. Jansen 
3010167291SKenneth E. Jansen //#include "auxmpi.h"
3110167291SKenneth E. Jansen //      static PetscOffset poff;
3210167291SKenneth E. Jansen 
3310167291SKenneth E. Jansen       static Mat lhsP;
3410167291SKenneth E. Jansen       static PC pc;
3510167291SKenneth E. Jansen       static KSP ksp;
3610167291SKenneth E. Jansen       static Vec DyP, resP, DyPLocal, resPGlobal;
3710167291SKenneth E. Jansen       static PetscErrorCode ierr;
3810167291SKenneth E. Jansen       static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
3910167291SKenneth E. Jansen       static IS LocalIndexSet, GlobalIndexSet;
4010167291SKenneth E. Jansen       static ISLocalToGlobalMapping VectorMapping;
4110167291SKenneth E. Jansen       static ISLocalToGlobalMapping GblVectorMapping;
4210167291SKenneth E. Jansen       static  VecScatter scatter7;
4310167291SKenneth E. Jansen       static int firstpetsccall = 1;
4410167291SKenneth E. Jansen 
4510167291SKenneth E. Jansen       static Mat lhsPs;
4610167291SKenneth E. Jansen       static PC pcs;
4710167291SKenneth E. Jansen       static KSP ksps;
4810167291SKenneth E. Jansen       static Vec DyPs, resPs, DyPLocals, resPGlobals;
4910167291SKenneth E. Jansen       static IS LocalIndexSets, GlobalIndexSets;
5010167291SKenneth E. Jansen       static ISLocalToGlobalMapping VectorMappings;
5110167291SKenneth E. Jansen       static ISLocalToGlobalMapping GblVectorMappings;
5210167291SKenneth E. Jansen       static VecScatter scatter7s;
5310167291SKenneth E. Jansen       static int firstpetsccalls = 1;
5410167291SKenneth E. Jansen 
5510167291SKenneth E. Jansen void     SolGMRp(double* y,         double* ac,        double* yold,
5610167291SKenneth E. Jansen      	double* x,         int* iBC,       double* BC,
5710167291SKenneth E. Jansen      	int* col,       int* row,       double* lhsk,
5810167291SKenneth E. Jansen      	double* res,       double* BDiag,
5910167291SKenneth E. Jansen      	int* iper,
6010167291SKenneth E. Jansen      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
6110167291SKenneth E. Jansen      	double* shglb,     double* Dy,        double* rerr, long long int* fncorp)
6210167291SKenneth E. Jansen {
6310167291SKenneth E. Jansen //
6410167291SKenneth E. Jansen // ----------------------------------------------------------------------
6510167291SKenneth E. Jansen //
6610167291SKenneth E. Jansen //  This is the preconditioned GMRES driver routine.
6710167291SKenneth E. Jansen //
6810167291SKenneth E. Jansen // input:
6910167291SKenneth E. Jansen //  y      (nshg,ndof)           : Y-variables at n+alpha_v
7010167291SKenneth E. Jansen //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
7110167291SKenneth E. Jansen //  yold   (nshg,ndof)           : Y-variables at beginning of step
7210167291SKenneth E. Jansen //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
7310167291SKenneth E. Jansen //  x      (numnp,nsd)            : node coordinates
7410167291SKenneth E. Jansen //  iBC    (nshg)                : BC codes
7510167291SKenneth E. Jansen //  BC     (nshg,ndofBC)         : BC constraint parameters
7610167291SKenneth E. Jansen //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
7710167291SKenneth E. Jansen //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
7810167291SKenneth E. Jansen //
7910167291SKenneth E. Jansen // output:
8010167291SKenneth E. Jansen //  res    (nshg,nflow)           : preconditioned residual
8110167291SKenneth E. Jansen //  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
8210167291SKenneth E. Jansen //
8310167291SKenneth E. Jansen //
8410167291SKenneth E. Jansen // Zdenek Johan,  Winter 1991.  (Fortran 90)
8510167291SKenneth E. Jansen // ----------------------------------------------------------------------
8610167291SKenneth E. Jansen //
8710167291SKenneth E. Jansen 
8810167291SKenneth E. Jansen 
8910167291SKenneth E. Jansen // Get variables from common_c.h
9010167291SKenneth E. Jansen       int nshg, nflow, nsd, iownnodes;
9110167291SKenneth E. Jansen       nshg  = conpar.nshg;
9210167291SKenneth E. Jansen       nflow = conpar.nflow;
9310167291SKenneth E. Jansen       nsd = NSD;
9410167291SKenneth E. Jansen       iownnodes = conpar.iownnodes;
9510167291SKenneth E. Jansen 
9610167291SKenneth E. Jansen       gcorp_t nshgt;
9710167291SKenneth E. Jansen       gcorp_t mbeg;
9810167291SKenneth E. Jansen       gcorp_t mend;
9910167291SKenneth E. Jansen       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
10010167291SKenneth E. Jansen       mbeg = (gcorp_t) newdim.minowned;
10110167291SKenneth E. Jansen       mend = (gcorp_t) newdim.maxowned;
10210167291SKenneth E. Jansen 
10310167291SKenneth E. Jansen 
10410167291SKenneth E. Jansen       int node, element, var, eqn;
10510167291SKenneth E. Jansen       double valtoinsert;
10610167291SKenneth E. Jansen       int nenl, iel, lelCat, lcsyst, iorder, nshl;
10710167291SKenneth E. Jansen       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
10810167291SKenneth E. Jansen       double DyFlat[nshg*nflow];
10910167291SKenneth E. Jansen       double DyFlatPhasta[nshg*nflow];
11010167291SKenneth E. Jansen       double rmes[nshg*nflow];
11110167291SKenneth E. Jansen // DEBUG
11210167291SKenneth E. Jansen       int i,j,k,l,m;
11310167291SKenneth E. Jansen       //int indexsetary[nflow*nshg];
11410167291SKenneth E. Jansen       //int gblindexsetary[nflow*nshg];
11510167291SKenneth E. Jansen       PetscInt indexsetary[nflow*nshg];
11610167291SKenneth E. Jansen       PetscInt gblindexsetary[nflow*nshg];
11710167291SKenneth E. Jansen       PetscInt nodetoinsert;
11810167291SKenneth E. Jansen 
11910167291SKenneth E. Jansen       // FIXME: PetscScalar
12010167291SKenneth E. Jansen       double  real_rtol, real_abstol, real_dtol;
12110167291SKenneth E. Jansen // /DEBUG
12210167291SKenneth E. Jansen       //double parray[1]; // Should be a PetscScalar
12310167291SKenneth E. Jansen       double *parray; // Should be a PetscScalar
12410167291SKenneth E. Jansen       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
12510167291SKenneth E. Jansen       PetscInt petsc_n;
12610167291SKenneth E. Jansen       PetscOne = 1;
12710167291SKenneth E. Jansen       uint64_t duration[4];
12810167291SKenneth E. Jansen 
12910167291SKenneth E. Jansen //      printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2));
13010167291SKenneth E. Jansen 
13110167291SKenneth E. Jansen        if(firstpetsccall == 1) {
13210167291SKenneth E. Jansen /*         call get_time(duration(1),duration(2));
13310167291SKenneth E. Jansen          call system('sleep 10');
13410167291SKenneth E. Jansen          call get_time(duration(3),duration(4));
13510167291SKenneth E. Jansen          call get_max_time_diff(duration(1), duration(3),
13610167291SKenneth E. Jansen                               duration(2), duration(4),
13710167291SKenneth E. Jansen                               "TenSecondSleep " // char(0))
13810167291SKenneth E. Jansen          if(myrank .eq. 0) {
13910167291SKenneth E. Jansen            !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed
14010167291SKenneth E. Jansen          }
14110167291SKenneth E. Jansen */
14210167291SKenneth E. Jansen        }
14310167291SKenneth E. Jansen //
14410167291SKenneth E. Jansen //
14510167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<******************
14610167291SKenneth E. Jansen //
14710167291SKenneth E. Jansen //
14810167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations
14910167291SKenneth E. Jansen //
15010167291SKenneth E. Jansen //
15110167291SKenneth E. Jansen       genpar.idflx = 0 ;
15210167291SKenneth E. Jansen       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
15310167291SKenneth E. Jansen       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
15410167291SKenneth E. Jansen 
15510167291SKenneth E. Jansen 
15610167291SKenneth E. Jansen 
15710167291SKenneth E. Jansen       PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
15810167291SKenneth E. Jansen       PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
15910167291SKenneth E. Jansen       gcorp_t glbNZ;
16010167291SKenneth E. Jansen 
16110167291SKenneth E. Jansen       if(firstpetsccall == 1) {
16210167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
16310167291SKenneth E. Jansen              idiagnz[i]=0;
16410167291SKenneth E. Jansen              iodiagnz[i]=0;
16510167291SKenneth E. Jansen         }
16610167291SKenneth E. Jansen         i=0;
16710167291SKenneth E. Jansen         for(k=0;k<nshg;k++) {
16810167291SKenneth E. Jansen           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
16910167291SKenneth E. Jansen // this node is not owned by this rank so we skip
17010167291SKenneth E. Jansen           } else {
17110167291SKenneth E. Jansen            for(j=col[i]-1;j<col[i+1]-1;j++) {
172*79f1763eSKenneth E. Jansen //          assert(row[j]<=nshg);
173*79f1763eSKenneth E. Jansen //          assert(fncorp[row[j]-1]<=nshgt);
174*79f1763eSKenneth E. Jansen              glbNZ=fncorp[row[j]-1];
17510167291SKenneth E. Jansen              if((glbNZ < mbeg) || (glbNZ > mend)) {
17610167291SKenneth E. Jansen                 iodiagnz[i]++;
17710167291SKenneth E. Jansen              } else {
17810167291SKenneth E. Jansen                 idiagnz[i]++;
17910167291SKenneth E. Jansen              }
18010167291SKenneth E. Jansen            }
18110167291SKenneth E. Jansen            i++;
18210167291SKenneth E. Jansen           }
18310167291SKenneth E. Jansen         }
18410167291SKenneth E. Jansen         gcorp_t mind=1000;
18510167291SKenneth E. Jansen         gcorp_t mino=1000;
18610167291SKenneth E. Jansen         gcorp_t maxd=0;
18710167291SKenneth E. Jansen         gcorp_t maxo=0;
18810167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
18910167291SKenneth E. Jansen            mind=min(mind,idiagnz[i]);
19010167291SKenneth E. Jansen            mino=min(mino,iodiagnz[i]);
19110167291SKenneth E. Jansen            maxd=max(maxd,idiagnz[i]);
19210167291SKenneth E. Jansen            maxo=max(maxo,iodiagnz[i]);
19310167291SKenneth E. Jansen //           iodiagnz[i]=max(iodiagnz[i],10);
19410167291SKenneth E. Jansen //           idiagnz[i]=max(idiagnz[i],10);
19510167291SKenneth E. Jansen //           iodiagnz[i]=2*iodiagnz[i];   //  estimate a bit higher for off-part interactions
19610167291SKenneth E. Jansen //           idiagnz[i]=2*idiagnz[i];   //  estimate a bit higher for off-part interactions
19710167291SKenneth E. Jansen         }
19810167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this
19910167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization
20010167291SKenneth E. Jansen 
20110167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
20210167291SKenneth E. Jansen            iodiagnz[i]=1.3*maxd;
20310167291SKenneth E. Jansen            idiagnz[i]=1.3*maxd;
20410167291SKenneth E. Jansen         }
20510167291SKenneth E. Jansen 
20610167291SKenneth E. Jansen 
20710167291SKenneth E. Jansen 
20810167291SKenneth E. Jansen         if(workfc.numpe < 200){
20910167291SKenneth E. Jansen           printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
21010167291SKenneth E. Jansen           printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
21110167291SKenneth E. Jansen         }
21210167291SKenneth E. Jansen         // Print debug info
21310167291SKenneth E. Jansen         if(nshgt < 200){
21410167291SKenneth E. Jansen         int irank;
21510167291SKenneth E. Jansen         for(irank=0;irank<workfc.numpe;irank++) {
21610167291SKenneth E. Jansen           if(irank == workfc.myrank){
21710167291SKenneth E. Jansen             printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
21810167291SKenneth E. Jansen             for(i=0;i<iownnodes;i++) {
21910167291SKenneth E. Jansen               printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
22010167291SKenneth E. Jansen             }
22110167291SKenneth E. Jansen           }
22210167291SKenneth E. Jansen          MPI_Barrier(MPI_COMM_WORLD);
22310167291SKenneth E. Jansen         }
22410167291SKenneth E. Jansen         }
22510167291SKenneth E. Jansen        }
22610167291SKenneth E. Jansen        if(firstpetsccall == 1) {
22710167291SKenneth E. Jansen 
22810167291SKenneth E. Jansen         petsc_bs = (PetscInt) nflow;
22910167291SKenneth E. Jansen         petsc_m  = (PetscInt) nflow* (PetscInt) iownnodes;
23010167291SKenneth E. Jansen         petsc_M  = (PetscInt) nshgt * (PetscInt) nflow;
23110167291SKenneth E. Jansen         petsc_PA  = (PetscInt) 40;
23210167291SKenneth E. Jansen         // TODO: fixe nshgt for 64 bit integer!!!!!
23310167291SKenneth E. Jansen 
23410167291SKenneth E. Jansen         //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes,
23510167291SKenneth E. Jansen         //       nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0,
23610167291SKenneth E. Jansen 
23710167291SKenneth E. Jansen // this has no pre-allocate        ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
23810167291SKenneth E. Jansen // this has no pre-allocate                             0, NULL, 0, NULL, &lhsP);
23910167291SKenneth E. Jansen 
24010167291SKenneth E. Jansen // next 2 do pre-allocate
24110167291SKenneth E. Jansen 
24210167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
24310167291SKenneth E. Jansen //      if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA);
24410167291SKenneth E. Jansen 
24510167291SKenneth E. Jansen         ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
24610167291SKenneth E. Jansen                              0, idiagnz, 0, iodiagnz, &lhsP);
24710167291SKenneth E. Jansen 
24810167291SKenneth E. Jansen //                              petsc_PA, NULL, petsc_PA, NULL, &lhsP);
24910167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
25010167291SKenneth E. Jansen //      if(workfc.myrank ==0) printf("Before matSetOoption 1  \n");
25110167291SKenneth E. Jansen         ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
25210167291SKenneth E. Jansen 
25310167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
254dcce770dSKenneth E. Jansen #ifdef JEDBROWN
25510167291SKenneth E. Jansen         ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
256dcce770dSKenneth E. Jansen #endif
25710167291SKenneth E. Jansen         ierr = MatSetUp(lhsP);
25810167291SKenneth E. Jansen 
25910167291SKenneth E. Jansen       PetscInt myMatStart, myMatEnd;
26010167291SKenneth E. Jansen       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
26110167291SKenneth E. Jansen       }
26210167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
26310167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
26410167291SKenneth E. Jansen       if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
26510167291SKenneth E. Jansen 
26610167291SKenneth E. Jansen       get_time(duration, (duration+1));
26710167291SKenneth E. Jansen 
26810167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
26910167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
27010167291SKenneth E. Jansen //      for (i=0; i<nshg ; i++) {
27110167291SKenneth E. Jansen //            assert(fncorp[i]>0);
27210167291SKenneth E. Jansen //      }
27310167291SKenneth E. Jansen 
27410167291SKenneth E. Jansen       ElmGMRPETSc(y,             ac,            x,
27510167291SKenneth E. Jansen                   shp,           shgl,          iBC,
27610167291SKenneth E. Jansen                   BC,            shpb,
27710167291SKenneth E. Jansen                   shglb,         res,
27810167291SKenneth E. Jansen                   rmes,                   iper,
27910167291SKenneth E. Jansen                   ilwork,        rerr ,         &lhsP);
28010167291SKenneth E. Jansen       get_time((duration+2), (duration+3));
28110167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
28210167291SKenneth E. Jansen                         (duration+1), (duration+3),
28310167291SKenneth E. Jansen                         "ElmGMRPETSc \0");  // char(0))
28410167291SKenneth E. Jansen //       printf("after ElmGMRPETSc\n");
28510167291SKenneth E. Jansen        if(firstpetsccall == 1) {
28610167291SKenneth E. Jansen       // Setup IndexSet. For now, we mimic vector insertion procedure
28710167291SKenneth E. Jansen       // Since we always reference by global indexes this doesn't matter
28810167291SKenneth E. Jansen       // except for cache performance)
28910167291SKenneth E. Jansen       // TODO: Better arrangment?
29010167291SKenneth E. Jansen         nodetoinsert = 0;
29110167291SKenneth E. Jansen         k=0;
29210167291SKenneth E. Jansen         if(workfc.numpe > 1) {
29310167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
29410167291SKenneth E. Jansen             nodetoinsert = fncorp[i]-1;
29510167291SKenneth E. Jansen //            if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert);
29610167291SKenneth E. Jansen //            assert(fncorp[i]>=0);
29710167291SKenneth E. Jansen             for (j=1; j<=nflow; j++) {
29810167291SKenneth E. Jansen               indexsetary[k] = nodetoinsert*nflow+(j-1);
29910167291SKenneth E. Jansen               assert(indexsetary[k]>=0);
30010167291SKenneth E. Jansen //              assert(fncorp[i]>=0);
30110167291SKenneth E. Jansen               k = k+1;
30210167291SKenneth E. Jansen             }
30310167291SKenneth E. Jansen           }
30410167291SKenneth E. Jansen         }
30510167291SKenneth E. Jansen         else {
30610167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
30710167291SKenneth E. Jansen             nodetoinsert = i;
30810167291SKenneth E. Jansen             for (j=1; j<=nflow; j++) {
30910167291SKenneth E. Jansen               indexsetary[k] = nodetoinsert*nflow+(j-1);
31010167291SKenneth E. Jansen               k = k+1;
31110167291SKenneth E. Jansen             }
31210167291SKenneth E. Jansen           }
31310167291SKenneth E. Jansen         }
31410167291SKenneth E. Jansen 
31510167291SKenneth E. Jansen         for (i=0; i<nshg*nflow; i++) {
31610167291SKenneth E. Jansen           gblindexsetary[i] = i ; //TODO: better option for performance?
31710167291SKenneth E. Jansen         }
31810167291SKenneth E. Jansen //  Create Vector Index Maps
31910167291SKenneth E. Jansen         petsc_n  = (PetscInt) nshg * (PetscInt) nflow;
32010167291SKenneth E. Jansen         //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary,
32110167291SKenneth E. Jansen      	//       PETSC_COPY_VALUES, &LocalIndexSet);
32210167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
32310167291SKenneth E. Jansen      	       PETSC_COPY_VALUES, &LocalIndexSet);
32410167291SKenneth E. Jansen //        printf("after 1st ISCreateGeneral %d\n", ierr);
32510167291SKenneth E. Jansen         //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary,
32610167291SKenneth E. Jansen         //       PETSC_COPY_VALUES, &GlobalIndexSet);
32710167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary,
32810167291SKenneth E. Jansen                PETSC_COPY_VALUES, &GlobalIndexSet);
32910167291SKenneth E. Jansen //        printf("after 2nd ISCreateGeneral %d\n", ierr);
33010167291SKenneth E. Jansen         ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping);
33110167291SKenneth E. Jansen //        printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr);
33210167291SKenneth E. Jansen         ierr =  ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping);
33310167291SKenneth E. Jansen //        printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr);
33410167291SKenneth E. Jansen       }
33510167291SKenneth E. Jansen       if(genpar.lhs == 1) {
33610167291SKenneth E. Jansen       get_time((duration), (duration+1));
33710167291SKenneth E. Jansen       ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
33810167291SKenneth E. Jansen       ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
33910167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
34010167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
34110167291SKenneth E. Jansen                         (duration+1), (duration+3),
34210167291SKenneth E. Jansen                         "MatAssembly \0"); // char(0))
34310167291SKenneth E. Jansen       get_time(duration, (duration+1));
34410167291SKenneth E. Jansen       }
34510167291SKenneth E. Jansen       if(firstpetsccall == 1) {
34610167291SKenneth E. Jansen         ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
34710167291SKenneth E. Jansen //  TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol
34810167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP);
34910167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
35010167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP);
35110167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
35210167291SKenneth E. Jansen       }
35310167291SKenneth E. Jansen       ierr = VecZeroEntries(resP);
35410167291SKenneth E. Jansen       ierr = VecZeroEntries(DyP);
35510167291SKenneth E. Jansen       if(firstpetsccall == 1) {
35610167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal);
35710167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal);
35810167291SKenneth E. Jansen         //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal);
35910167291SKenneth E. Jansen         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
36010167291SKenneth E. Jansen       }
36110167291SKenneth E. Jansen         ierr = VecZeroEntries(resPGlobal);
36210167291SKenneth E. Jansen //         call VecZeroEntries(DyPLocal, ierr)
36310167291SKenneth E. Jansen 
36410167291SKenneth E. Jansen       PetscRow=0;
36510167291SKenneth E. Jansen       k = 0;
36610167291SKenneth E. Jansen       int index;
36710167291SKenneth E. Jansen       for (i=0; i<nshg; i++ ){
36810167291SKenneth E. Jansen         for (j = 1; j<=nflow; j++){
36910167291SKenneth E. Jansen           index = i + (j-1)*nshg;
37010167291SKenneth E. Jansen           valtoinsert = res[index];
37110167291SKenneth E. Jansen           if(workfc.numpe > 1) {
37210167291SKenneth E. Jansen             PetscRow = (fncorp[i]-1)*nflow+(j-1);
37310167291SKenneth E. Jansen           }
37410167291SKenneth E. Jansen           else {
37510167291SKenneth E. Jansen             PetscRow = i*nflow+(j-1);
37610167291SKenneth E. Jansen           }
37710167291SKenneth E. Jansen           assert(fncorp[i]<=nshgt);
37810167291SKenneth E. Jansen           assert(fncorp[i]>0);
37910167291SKenneth E. Jansen           assert(PetscRow>=0);
38010167291SKenneth E. Jansen           assert(PetscRow<=nshgt*nflow);
38110167291SKenneth E. Jansen           ierr =  VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES);
38210167291SKenneth E. Jansen         }
38310167291SKenneth E. Jansen       }
38410167291SKenneth E. Jansen //      printf("after VecSetValue loop %d\n",ierr);
38510167291SKenneth E. Jansen       ierr = VecAssemblyBegin(resP);
38610167291SKenneth E. Jansen       ierr = VecAssemblyEnd(resP);
38710167291SKenneth E. Jansen       get_time((duration+2), (duration+3));
38810167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
38910167291SKenneth E. Jansen                         (duration+1), (duration+3),
39010167291SKenneth E. Jansen                         "VectorWorkPre \0"); // char(0))
39110167291SKenneth E. Jansen 
39210167291SKenneth E. Jansen       get_time((duration),(duration+1));
39310167291SKenneth E. Jansen 
39410167291SKenneth E. Jansen       if(firstpetsccall == 1) {
39510167291SKenneth E. Jansen         ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
39610167291SKenneth E. Jansen         ierr = KSPSetOperators(ksp, lhsP, lhsP);
39710167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN);
39810167291SKenneth E. Jansen         ierr = KSPGetPC(ksp, &pc);
39910167291SKenneth E. Jansen         ierr = PCSetType(pc, PCPBJACOBI);
40010167291SKenneth E. Jansen         PetscInt maxits;
40110167291SKenneth E. Jansen         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
40210167291SKenneth E. Jansen         //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace);
40310167291SKenneth E. Jansen         ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
40410167291SKenneth E. Jansen         ierr = KSPSetFromOptions(ksp);
40510167291SKenneth E. Jansen       }
40610167291SKenneth E. Jansen       ierr = KSPSolve(ksp, resP, DyP);
40710167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
40810167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
40910167291SKenneth E. Jansen                         (duration+1), (duration+3),
41010167291SKenneth E. Jansen                         "KSPSolve \0"); // char(0))
41110167291SKenneth E. Jansen      //if(myrank .eq. 0) then
41210167291SKenneth E. Jansen      //!write(*,"(A, E10.3)") "KSPSolve ", elapsed
41310167291SKenneth E. Jansen      //end if
41410167291SKenneth E. Jansen       get_time((duration),(duration+1));
41510167291SKenneth E. Jansen       if(firstpetsccall == 1) {
41610167291SKenneth E. Jansen       ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7);
41710167291SKenneth E. Jansen       }
41810167291SKenneth E. Jansen       ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
41910167291SKenneth E. Jansen       ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
42010167291SKenneth E. Jansen       ierr = VecGetArray(DyPLocal, &parray);
42110167291SKenneth E. Jansen       PetscRow = 0;
42210167291SKenneth E. Jansen       for ( node = 0; node< nshg; node++) {
42310167291SKenneth E. Jansen         for (var = 1; var<= nflow; var++) {
42410167291SKenneth E. Jansen           index = node + (var-1)*nshg;
42510167291SKenneth E. Jansen           Dy[index] = parray[PetscRow];
42610167291SKenneth E. Jansen           PetscRow = PetscRow+1;
42710167291SKenneth E. Jansen         }
42810167291SKenneth E. Jansen       }
42910167291SKenneth E. Jansen       ierr = VecRestoreArray(DyPLocal, &parray);
43010167291SKenneth E. Jansen 
43110167291SKenneth E. Jansen       firstpetsccall = 0;
43210167291SKenneth E. Jansen       // later timer ('Solver  ');
43310167291SKenneth E. Jansen 
43410167291SKenneth E. Jansen // .... output the statistics
43510167291SKenneth E. Jansen //
43610167291SKenneth E. Jansen       itrpar.iKs=0; // see rstat()
43710167291SKenneth E. Jansen       PetscInt its;
43810167291SKenneth E. Jansen       //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs);
43910167291SKenneth E. Jansen       ierr = KSPGetIterationNumber(ksp, &its);
44010167291SKenneth E. Jansen       itrpar.iKs = (int) its;
44110167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
44210167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
44310167291SKenneth E. Jansen                         (duration+1), (duration+3),
44410167291SKenneth E. Jansen                         "solWork \0"); // char(0))
44510167291SKenneth E. Jansen       itrpar.ntotGM += (int) its;
44610167291SKenneth E. Jansen       rstat (res, ilwork);
44710167291SKenneth E. Jansen //
44810167291SKenneth E. Jansen // .... stop the timer
44910167291SKenneth E. Jansen //
45010167291SKenneth E. Jansen // 3002 continue                  ! no solve just res.
45110167291SKenneth E. Jansen       // later call timer ('Back    ')
45210167291SKenneth E. Jansen //
45310167291SKenneth E. Jansen // .... end
45410167291SKenneth E. Jansen //
45510167291SKenneth E. Jansen }
45610167291SKenneth E. Jansen 
45710167291SKenneth E. Jansen void     SolGMRpSclr(double* y,         double* ac,
45810167291SKenneth E. Jansen      	double* x,      double* elDw,   int* iBC,       double* BC,
45910167291SKenneth E. Jansen      	int* col,       int* row,
46010167291SKenneth E. Jansen      	int* iper,
46110167291SKenneth E. Jansen      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
46210167291SKenneth E. Jansen      	double* shglb,  double* res,   double* Dy,     long long int* fncorp)
46310167291SKenneth E. Jansen {
46410167291SKenneth E. Jansen //
46510167291SKenneth E. Jansen // ----------------------------------------------------------------------
46610167291SKenneth E. Jansen //
46710167291SKenneth E. Jansen //  This is the preconditioned GMRES driver routine.
46810167291SKenneth E. Jansen //
46910167291SKenneth E. Jansen // input:
47010167291SKenneth E. Jansen //  y      (nshg,ndof)           : Y-variables at n+alpha_v
47110167291SKenneth E. Jansen //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
47210167291SKenneth E. Jansen //  yold   (nshg,ndof)           : Y-variables at beginning of step
47310167291SKenneth E. Jansen //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
47410167291SKenneth E. Jansen //  x      (numnp,nsd)            : node coordinates
47510167291SKenneth E. Jansen //  iBC    (nshg)                : BC codes
47610167291SKenneth E. Jansen //  BC     (nshg,ndofBC)         : BC constraint parameters
47710167291SKenneth E. Jansen //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
47810167291SKenneth E. Jansen //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
47910167291SKenneth E. Jansen //
48010167291SKenneth E. Jansen // ----------------------------------------------------------------------
48110167291SKenneth E. Jansen //
48210167291SKenneth E. Jansen 
48310167291SKenneth E. Jansen 
48410167291SKenneth E. Jansen // Get variables from common_c.h
48510167291SKenneth E. Jansen       int nshg, nflow, nsd, iownnodes;
48610167291SKenneth E. Jansen       nshg  = conpar.nshg;
48710167291SKenneth E. Jansen       nsd = NSD;
48810167291SKenneth E. Jansen       iownnodes = conpar.iownnodes;
48910167291SKenneth E. Jansen 
49010167291SKenneth E. Jansen       gcorp_t nshgt;
49110167291SKenneth E. Jansen       gcorp_t mbeg;
49210167291SKenneth E. Jansen       gcorp_t mend;
49310167291SKenneth E. Jansen       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
49410167291SKenneth E. Jansen       mbeg = (gcorp_t) newdim.minowned;
49510167291SKenneth E. Jansen       mend = (gcorp_t) newdim.maxowned;
49610167291SKenneth E. Jansen 
49710167291SKenneth E. Jansen 
49810167291SKenneth E. Jansen       int node, element, var, eqn;
49910167291SKenneth E. Jansen       double valtoinsert;
50010167291SKenneth E. Jansen       int nenl, iel, lelCat, lcsyst, iorder, nshl;
50110167291SKenneth E. Jansen       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
50210167291SKenneth E. Jansen       double DyFlats[nshg];
50310167291SKenneth E. Jansen       double DyFlatPhastas[nshg];
50410167291SKenneth E. Jansen       double rmes[nshg];
50510167291SKenneth E. Jansen // DEBUG
50610167291SKenneth E. Jansen       int i,j,k,l,m;
50710167291SKenneth E. Jansen       PetscInt indexsetarys[nshg];
50810167291SKenneth E. Jansen       PetscInt gblindexsetarys[nshg];
50910167291SKenneth E. Jansen       PetscInt nodetoinsert;
51010167291SKenneth E. Jansen 
51110167291SKenneth E. Jansen       double  real_rtol, real_abstol, real_dtol;
51210167291SKenneth E. Jansen       double *parray;
51310167291SKenneth E. Jansen       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
51410167291SKenneth E. Jansen       PetscInt petsc_n;
51510167291SKenneth E. Jansen       PetscOne = 1;
51610167291SKenneth E. Jansen       uint64_t duration[4];
51710167291SKenneth E. Jansen 
51810167291SKenneth E. Jansen 
51910167291SKenneth E. Jansen //
52010167291SKenneth E. Jansen //
52110167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<******************
52210167291SKenneth E. Jansen //
52310167291SKenneth E. Jansen //
52410167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations
52510167291SKenneth E. Jansen //
52610167291SKenneth E. Jansen //
52710167291SKenneth E. Jansen       genpar.idflx = 0 ;
52810167291SKenneth E. Jansen       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
52910167291SKenneth E. Jansen       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
53010167291SKenneth E. Jansen 
53110167291SKenneth E. Jansen 
53210167291SKenneth E. Jansen 
53310167291SKenneth E. Jansen       PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
53410167291SKenneth E. Jansen       PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
53510167291SKenneth E. Jansen       gcorp_t glbNZ;
53610167291SKenneth E. Jansen 
53710167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
53810167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
53910167291SKenneth E. Jansen              idiagnz[i]=0;
54010167291SKenneth E. Jansen              iodiagnz[i]=0;
54110167291SKenneth E. Jansen         }
54210167291SKenneth E. Jansen         i=0;
54310167291SKenneth E. Jansen         for(k=0;k<nshg;k++) {
54410167291SKenneth E. Jansen           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
54510167291SKenneth E. Jansen // this node is not owned by this rank so we skip
54610167291SKenneth E. Jansen           } else {
54710167291SKenneth E. Jansen            for(j=col[i]-1;j<col[i+1]-1;j++) {
54810167291SKenneth E. Jansen              glbNZ=fncorp[row[j]];
54910167291SKenneth E. Jansen              if((glbNZ < mbeg) || (glbNZ > mend)) {
55010167291SKenneth E. Jansen                 iodiagnz[i]++;
55110167291SKenneth E. Jansen              } else {
55210167291SKenneth E. Jansen                 idiagnz[i]++;
55310167291SKenneth E. Jansen              }
55410167291SKenneth E. Jansen            }
55510167291SKenneth E. Jansen            i++;
55610167291SKenneth E. Jansen           }
55710167291SKenneth E. Jansen         }
55810167291SKenneth E. Jansen         gcorp_t mind=1000;
55910167291SKenneth E. Jansen         gcorp_t mino=1000;
56010167291SKenneth E. Jansen         gcorp_t maxd=0;
56110167291SKenneth E. Jansen         gcorp_t maxo=0;
56210167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
56310167291SKenneth E. Jansen            mind=min(mind,idiagnz[i]);
56410167291SKenneth E. Jansen            mino=min(mino,iodiagnz[i]);
56510167291SKenneth E. Jansen            maxd=max(maxd,idiagnz[i]);
56610167291SKenneth E. Jansen            maxo=max(maxo,iodiagnz[i]);
56710167291SKenneth E. Jansen         }
56810167291SKenneth E. Jansen 
56910167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
57010167291SKenneth E. Jansen            iodiagnz[i]=1.3*maxd;
57110167291SKenneth E. Jansen            idiagnz[i]=1.3*maxd;
57210167291SKenneth E. Jansen         }
57310167291SKenneth E. Jansen 
57410167291SKenneth E. Jansen 
57510167291SKenneth E. Jansen 
57610167291SKenneth E. Jansen        }
57710167291SKenneth E. Jansen        if(firstpetsccalls == 1) {
57810167291SKenneth E. Jansen 
57910167291SKenneth E. Jansen         petsc_m  = (PetscInt) iownnodes;
58010167291SKenneth E. Jansen         petsc_M  = (PetscInt) nshgt;
58110167291SKenneth E. Jansen         petsc_PA  = (PetscInt) 40;
58210167291SKenneth E. Jansen 
58310167291SKenneth E. Jansen         ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
58410167291SKenneth E. Jansen                              0, idiagnz, 0, iodiagnz, &lhsPs);
58510167291SKenneth E. Jansen 
58610167291SKenneth E. Jansen         ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
58710167291SKenneth E. Jansen 
58810167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
589dcce770dSKenneth E. Jansen #ifdef JEDBROWN
59010167291SKenneth E. Jansen         ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
591dcce770dSKenneth E. Jansen #endif
59210167291SKenneth E. Jansen         ierr = MatSetUp(lhsPs);
59310167291SKenneth E. Jansen 
59410167291SKenneth E. Jansen       PetscInt myMatStart, myMatEnd;
59510167291SKenneth E. Jansen       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
59610167291SKenneth E. Jansen       }
59710167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
59810167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
59910167291SKenneth E. Jansen       if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
60010167291SKenneth E. Jansen 
60110167291SKenneth E. Jansen       get_time(duration, (duration+1));
60210167291SKenneth E. Jansen 
60310167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
60410167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
60510167291SKenneth E. Jansen //      for (i=0; i<nshg ; i++) {
60610167291SKenneth E. Jansen //            assert(fncorp[i]>0);
60710167291SKenneth E. Jansen //      }
60810167291SKenneth E. Jansen 
60910167291SKenneth E. Jansen       ElmGMRPETScSclr(y,             ac,            x, elDw,
61010167291SKenneth E. Jansen                   shp,           shgl,          iBC,
61110167291SKenneth E. Jansen                   BC,            shpb,
61210167291SKenneth E. Jansen                   shglb,         res,
61310167291SKenneth E. Jansen                   rmes,                   iper,
61410167291SKenneth E. Jansen                   ilwork,        &lhsPs);
61510167291SKenneth E. Jansen        if(firstpetsccalls == 1) {
61610167291SKenneth E. Jansen       // Setup IndexSet. For now, we mimic vector insertion procedure
61710167291SKenneth E. Jansen       // Since we always reference by global indexes this doesn't matter
61810167291SKenneth E. Jansen       // except for cache performance)
61910167291SKenneth E. Jansen       // TODO: Better arrangment?
62010167291SKenneth E. Jansen         nodetoinsert = 0;
62110167291SKenneth E. Jansen         k=0;
62210167291SKenneth E. Jansen         if(workfc.numpe > 1) {
62310167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
62410167291SKenneth E. Jansen             nodetoinsert = fncorp[i]-1;
62510167291SKenneth E. Jansen             indexsetarys[k] = nodetoinsert;
62610167291SKenneth E. Jansen             k = k+1;
62710167291SKenneth E. Jansen           }
62810167291SKenneth E. Jansen         }
62910167291SKenneth E. Jansen         else {
63010167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
63110167291SKenneth E. Jansen             nodetoinsert = i;
63210167291SKenneth E. Jansen             indexsetarys[k] = nodetoinsert;
63310167291SKenneth E. Jansen             k = k+1;
63410167291SKenneth E. Jansen           }
63510167291SKenneth E. Jansen         }
63610167291SKenneth E. Jansen 
63710167291SKenneth E. Jansen         for (i=0; i<nshg; i++) {
63810167291SKenneth E. Jansen           gblindexsetarys[i] = i ; //TODO: better option for performance?
63910167291SKenneth E. Jansen         }
64010167291SKenneth E. Jansen //  Create Vector Index Maps
64110167291SKenneth E. Jansen         petsc_n  = (PetscInt) nshg;
64210167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
64310167291SKenneth E. Jansen      	       PETSC_COPY_VALUES, &LocalIndexSets);
64410167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys,
64510167291SKenneth E. Jansen                PETSC_COPY_VALUES, &GlobalIndexSets);
64610167291SKenneth E. Jansen         ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings);
64710167291SKenneth E. Jansen         ierr =  ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings);
64810167291SKenneth E. Jansen       }
64910167291SKenneth E. Jansen       if(genpar.lhs ==1) {
65010167291SKenneth E. Jansen       ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
65110167291SKenneth E. Jansen       ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
65210167291SKenneth E. Jansen       }
65310167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
65410167291SKenneth E. Jansen         ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
65510167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
65610167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
65710167291SKenneth E. Jansen       }
65810167291SKenneth E. Jansen       ierr = VecZeroEntries(resPs);
65910167291SKenneth E. Jansen       ierr = VecZeroEntries(DyPs);
66010167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
66110167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals);
66210167291SKenneth E. Jansen         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
66310167291SKenneth E. Jansen       }
66410167291SKenneth E. Jansen         ierr = VecZeroEntries(resPGlobals);
66510167291SKenneth E. Jansen 
66610167291SKenneth E. Jansen       PetscRow=0;
66710167291SKenneth E. Jansen       k = 0;
66810167291SKenneth E. Jansen       int index;
66910167291SKenneth E. Jansen       for (i=0; i<nshg; i++ ){
67010167291SKenneth E. Jansen           valtoinsert = res[i];
67110167291SKenneth E. Jansen           if(workfc.numpe > 1) {
67210167291SKenneth E. Jansen             PetscRow = (fncorp[i]-1);
67310167291SKenneth E. Jansen           }
67410167291SKenneth E. Jansen           else {
67510167291SKenneth E. Jansen             PetscRow = i-1;
67610167291SKenneth E. Jansen           }
67710167291SKenneth E. Jansen           assert(fncorp[i]<=nshgt);
67810167291SKenneth E. Jansen           assert(fncorp[i]>0);
67910167291SKenneth E. Jansen           assert(PetscRow>=0);
68010167291SKenneth E. Jansen           assert(PetscRow<=nshgt);
68110167291SKenneth E. Jansen           ierr =  VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES);
68210167291SKenneth E. Jansen       }
68310167291SKenneth E. Jansen //      printf("after VecSetValue loop %d\n",ierr);
68410167291SKenneth E. Jansen       ierr = VecAssemblyBegin(resPs);
68510167291SKenneth E. Jansen       ierr = VecAssemblyEnd(resPs);
68610167291SKenneth E. Jansen 
68710167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
68810167291SKenneth E. Jansen         ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
68910167291SKenneth E. Jansen         ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
69010167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN);
69110167291SKenneth E. Jansen         ierr = KSPGetPC(ksps, &pcs);
69210167291SKenneth E. Jansen         ierr = PCSetType(pcs, PCPBJACOBI);
69310167291SKenneth E. Jansen         PetscInt maxits;
69410167291SKenneth E. Jansen         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
69510167291SKenneth E. Jansen         //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace);
69610167291SKenneth E. Jansen         ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
69710167291SKenneth E. Jansen         ierr = KSPSetFromOptions(ksps);
69810167291SKenneth E. Jansen       }
69910167291SKenneth E. Jansen       ierr = KSPSolve(ksps, resPs, DyPs);
70010167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
70110167291SKenneth E. Jansen       ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s);
70210167291SKenneth E. Jansen       }
70310167291SKenneth E. Jansen       ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
70410167291SKenneth E. Jansen       ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
70510167291SKenneth E. Jansen       ierr = VecGetArray(DyPLocals, &parray);
70610167291SKenneth E. Jansen       PetscRow = 0;
70710167291SKenneth E. Jansen       for ( node = 0; node< nshg; node++) {
70810167291SKenneth E. Jansen           index = node;
70910167291SKenneth E. Jansen           Dy[index] = parray[PetscRow];
71010167291SKenneth E. Jansen           PetscRow = PetscRow+1;
71110167291SKenneth E. Jansen       }
71210167291SKenneth E. Jansen       ierr = VecRestoreArray(DyPLocals, &parray);
71310167291SKenneth E. Jansen 
71410167291SKenneth E. Jansen       firstpetsccalls = 0;
71510167291SKenneth E. Jansen 
71610167291SKenneth E. Jansen // .... output the statistics
71710167291SKenneth E. Jansen //
71810167291SKenneth E. Jansen //      itrpar.iKss=0; // see rstat()
71910167291SKenneth E. Jansen       PetscInt its;
72010167291SKenneth E. Jansen       ierr = KSPGetIterationNumber(ksps, &its);
72110167291SKenneth E. Jansen       itrpar.iKss = (int) its;
72210167291SKenneth E. Jansen       itrpar.ntotGMs += (int) its;
72310167291SKenneth E. Jansen       rstatSclr (res, ilwork);
72410167291SKenneth E. Jansen //
72510167291SKenneth E. Jansen // .... end
72610167291SKenneth E. Jansen //
72710167291SKenneth E. Jansen }
72817860365SKenneth E. Jansen #endif
729