xref: /phasta/phSolver/compressible/solgmrpetsc.c (revision 1016729149754f57cd03fe576ba6fd0f1723ab31)
1*10167291SKenneth E. Jansen #include <stdio.h>
2*10167291SKenneth E. Jansen 
3*10167291SKenneth E. Jansen #include "petscsys.h"
4*10167291SKenneth E. Jansen #include "petscvec.h"
5*10167291SKenneth E. Jansen #include "petscmat.h"
6*10167291SKenneth E. Jansen #include "petscpc.h"
7*10167291SKenneth E. Jansen #include "petscksp.h"
8*10167291SKenneth E. Jansen 
9*10167291SKenneth E. Jansen #include "assert.h"
10*10167291SKenneth E. Jansen #include "common_c.h"
11*10167291SKenneth E. Jansen 
12*10167291SKenneth E. Jansen #include <FCMangle.h>
13*10167291SKenneth E. Jansen #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
14*10167291SKenneth E. Jansen #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
15*10167291SKenneth E. Jansen #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
16*10167291SKenneth E. Jansen #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
17*10167291SKenneth E. Jansen #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT)
18*10167291SKenneth E. Jansen #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR)
19*10167291SKenneth E. Jansen #define min(a,b) (((a) < (b)) ? (a) : (b))
20*10167291SKenneth E. Jansen #define max(a,b) (((a) > (b)) ? (a) : (b))
21*10167291SKenneth E. Jansen #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
22*10167291SKenneth E. Jansen #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
23*10167291SKenneth E. Jansen 
24*10167291SKenneth E. Jansen typedef long long int gcorp_t;
25*10167291SKenneth E. Jansen 
26*10167291SKenneth E. Jansen void get_time(uint64_t* rv, uint64_t* cycle);
27*10167291SKenneth E. Jansen void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
28*10167291SKenneth E. Jansen 
29*10167291SKenneth E. Jansen //#include "auxmpi.h"
30*10167291SKenneth E. Jansen //      static PetscOffset poff;
31*10167291SKenneth E. Jansen 
32*10167291SKenneth E. Jansen       static Mat lhsP;
33*10167291SKenneth E. Jansen       static PC pc;
34*10167291SKenneth E. Jansen       static KSP ksp;
35*10167291SKenneth E. Jansen       static Vec DyP, resP, DyPLocal, resPGlobal;
36*10167291SKenneth E. Jansen       static PetscErrorCode ierr;
37*10167291SKenneth E. Jansen       static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
38*10167291SKenneth E. Jansen       static IS LocalIndexSet, GlobalIndexSet;
39*10167291SKenneth E. Jansen       static ISLocalToGlobalMapping VectorMapping;
40*10167291SKenneth E. Jansen       static ISLocalToGlobalMapping GblVectorMapping;
41*10167291SKenneth E. Jansen       static  VecScatter scatter7;
42*10167291SKenneth E. Jansen       static int firstpetsccall = 1;
43*10167291SKenneth E. Jansen 
44*10167291SKenneth E. Jansen       static Mat lhsPs;
45*10167291SKenneth E. Jansen       static PC pcs;
46*10167291SKenneth E. Jansen       static KSP ksps;
47*10167291SKenneth E. Jansen       static Vec DyPs, resPs, DyPLocals, resPGlobals;
48*10167291SKenneth E. Jansen       static IS LocalIndexSets, GlobalIndexSets;
49*10167291SKenneth E. Jansen       static ISLocalToGlobalMapping VectorMappings;
50*10167291SKenneth E. Jansen       static ISLocalToGlobalMapping GblVectorMappings;
51*10167291SKenneth E. Jansen       static VecScatter scatter7s;
52*10167291SKenneth E. Jansen       static int firstpetsccalls = 1;
53*10167291SKenneth E. Jansen 
54*10167291SKenneth E. Jansen void     SolGMRp(double* y,         double* ac,        double* yold,
55*10167291SKenneth E. Jansen      	double* x,         int* iBC,       double* BC,
56*10167291SKenneth E. Jansen      	int* col,       int* row,       double* lhsk,
57*10167291SKenneth E. Jansen      	double* res,       double* BDiag,
58*10167291SKenneth E. Jansen      	int* iper,
59*10167291SKenneth E. Jansen      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
60*10167291SKenneth E. Jansen      	double* shglb,     double* Dy,        double* rerr, long long int* fncorp)
61*10167291SKenneth E. Jansen {
62*10167291SKenneth E. Jansen //
63*10167291SKenneth E. Jansen // ----------------------------------------------------------------------
64*10167291SKenneth E. Jansen //
65*10167291SKenneth E. Jansen //  This is the preconditioned GMRES driver routine.
66*10167291SKenneth E. Jansen //
67*10167291SKenneth E. Jansen // input:
68*10167291SKenneth E. Jansen //  y      (nshg,ndof)           : Y-variables at n+alpha_v
69*10167291SKenneth E. Jansen //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
70*10167291SKenneth E. Jansen //  yold   (nshg,ndof)           : Y-variables at beginning of step
71*10167291SKenneth E. Jansen //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
72*10167291SKenneth E. Jansen //  x      (numnp,nsd)            : node coordinates
73*10167291SKenneth E. Jansen //  iBC    (nshg)                : BC codes
74*10167291SKenneth E. Jansen //  BC     (nshg,ndofBC)         : BC constraint parameters
75*10167291SKenneth E. Jansen //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
76*10167291SKenneth E. Jansen //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
77*10167291SKenneth E. Jansen //
78*10167291SKenneth E. Jansen // output:
79*10167291SKenneth E. Jansen //  res    (nshg,nflow)           : preconditioned residual
80*10167291SKenneth E. Jansen //  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
81*10167291SKenneth E. Jansen //
82*10167291SKenneth E. Jansen //
83*10167291SKenneth E. Jansen // Zdenek Johan,  Winter 1991.  (Fortran 90)
84*10167291SKenneth E. Jansen // ----------------------------------------------------------------------
85*10167291SKenneth E. Jansen //
86*10167291SKenneth E. Jansen 
87*10167291SKenneth E. Jansen 
88*10167291SKenneth E. Jansen // Get variables from common_c.h
89*10167291SKenneth E. Jansen       int nshg, nflow, nsd, iownnodes;
90*10167291SKenneth E. Jansen       nshg  = conpar.nshg;
91*10167291SKenneth E. Jansen       nflow = conpar.nflow;
92*10167291SKenneth E. Jansen       nsd = NSD;
93*10167291SKenneth E. Jansen       iownnodes = conpar.iownnodes;
94*10167291SKenneth E. Jansen 
95*10167291SKenneth E. Jansen       gcorp_t nshgt;
96*10167291SKenneth E. Jansen       gcorp_t mbeg;
97*10167291SKenneth E. Jansen       gcorp_t mend;
98*10167291SKenneth E. Jansen       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
99*10167291SKenneth E. Jansen       mbeg = (gcorp_t) newdim.minowned;
100*10167291SKenneth E. Jansen       mend = (gcorp_t) newdim.maxowned;
101*10167291SKenneth E. Jansen 
102*10167291SKenneth E. Jansen 
103*10167291SKenneth E. Jansen       int node, element, var, eqn;
104*10167291SKenneth E. Jansen       double valtoinsert;
105*10167291SKenneth E. Jansen       int nenl, iel, lelCat, lcsyst, iorder, nshl;
106*10167291SKenneth E. Jansen       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
107*10167291SKenneth E. Jansen       double DyFlat[nshg*nflow];
108*10167291SKenneth E. Jansen       double DyFlatPhasta[nshg*nflow];
109*10167291SKenneth E. Jansen       double rmes[nshg*nflow];
110*10167291SKenneth E. Jansen // DEBUG
111*10167291SKenneth E. Jansen       int i,j,k,l,m;
112*10167291SKenneth E. Jansen       //int indexsetary[nflow*nshg];
113*10167291SKenneth E. Jansen       //int gblindexsetary[nflow*nshg];
114*10167291SKenneth E. Jansen       PetscInt indexsetary[nflow*nshg];
115*10167291SKenneth E. Jansen       PetscInt gblindexsetary[nflow*nshg];
116*10167291SKenneth E. Jansen       PetscInt nodetoinsert;
117*10167291SKenneth E. Jansen 
118*10167291SKenneth E. Jansen       // FIXME: PetscScalar
119*10167291SKenneth E. Jansen       double  real_rtol, real_abstol, real_dtol;
120*10167291SKenneth E. Jansen // /DEBUG
121*10167291SKenneth E. Jansen       //double parray[1]; // Should be a PetscScalar
122*10167291SKenneth E. Jansen       double *parray; // Should be a PetscScalar
123*10167291SKenneth E. Jansen       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
124*10167291SKenneth E. Jansen       PetscInt petsc_n;
125*10167291SKenneth E. Jansen       PetscOne = 1;
126*10167291SKenneth E. Jansen       uint64_t duration[4];
127*10167291SKenneth E. Jansen 
128*10167291SKenneth E. Jansen //      printf("%g %g %g \n", *rerr, *(rerr+1) , *(rerr+2));
129*10167291SKenneth E. Jansen 
130*10167291SKenneth E. Jansen        if(firstpetsccall == 1) {
131*10167291SKenneth E. Jansen /*         call get_time(duration(1),duration(2));
132*10167291SKenneth E. Jansen          call system('sleep 10');
133*10167291SKenneth E. Jansen          call get_time(duration(3),duration(4));
134*10167291SKenneth E. Jansen          call get_max_time_diff(duration(1), duration(3),
135*10167291SKenneth E. Jansen                               duration(2), duration(4),
136*10167291SKenneth E. Jansen                               "TenSecondSleep " // char(0))
137*10167291SKenneth E. Jansen          if(myrank .eq. 0) {
138*10167291SKenneth E. Jansen            !write(*,"(A, E10.3)") "TenSecondSleep ", elapsed
139*10167291SKenneth E. Jansen          }
140*10167291SKenneth E. Jansen */
141*10167291SKenneth E. Jansen        }
142*10167291SKenneth E. Jansen //
143*10167291SKenneth E. Jansen //
144*10167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<******************
145*10167291SKenneth E. Jansen //
146*10167291SKenneth E. Jansen //
147*10167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations
148*10167291SKenneth E. Jansen //
149*10167291SKenneth E. Jansen //
150*10167291SKenneth E. Jansen       genpar.idflx = 0 ;
151*10167291SKenneth E. Jansen       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
152*10167291SKenneth E. Jansen       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
153*10167291SKenneth E. Jansen 
154*10167291SKenneth E. Jansen 
155*10167291SKenneth E. Jansen 
156*10167291SKenneth E. Jansen       PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
157*10167291SKenneth E. Jansen       PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
158*10167291SKenneth E. Jansen       gcorp_t glbNZ;
159*10167291SKenneth E. Jansen 
160*10167291SKenneth E. Jansen       if(firstpetsccall == 1) {
161*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
162*10167291SKenneth E. Jansen              idiagnz[i]=0;
163*10167291SKenneth E. Jansen              iodiagnz[i]=0;
164*10167291SKenneth E. Jansen         }
165*10167291SKenneth E. Jansen         i=0;
166*10167291SKenneth E. Jansen         for(k=0;k<nshg;k++) {
167*10167291SKenneth E. Jansen           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
168*10167291SKenneth E. Jansen // this node is not owned by this rank so we skip
169*10167291SKenneth E. Jansen           } else {
170*10167291SKenneth E. Jansen            for(j=col[i]-1;j<col[i+1]-1;j++) {
171*10167291SKenneth E. Jansen              glbNZ=fncorp[row[j]];
172*10167291SKenneth E. Jansen              if((glbNZ < mbeg) || (glbNZ > mend)) {
173*10167291SKenneth E. Jansen                 iodiagnz[i]++;
174*10167291SKenneth E. Jansen              } else {
175*10167291SKenneth E. Jansen                 idiagnz[i]++;
176*10167291SKenneth E. Jansen              }
177*10167291SKenneth E. Jansen            }
178*10167291SKenneth E. Jansen            i++;
179*10167291SKenneth E. Jansen           }
180*10167291SKenneth E. Jansen         }
181*10167291SKenneth E. Jansen         gcorp_t mind=1000;
182*10167291SKenneth E. Jansen         gcorp_t mino=1000;
183*10167291SKenneth E. Jansen         gcorp_t maxd=0;
184*10167291SKenneth E. Jansen         gcorp_t maxo=0;
185*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
186*10167291SKenneth E. Jansen            mind=min(mind,idiagnz[i]);
187*10167291SKenneth E. Jansen            mino=min(mino,iodiagnz[i]);
188*10167291SKenneth E. Jansen            maxd=max(maxd,idiagnz[i]);
189*10167291SKenneth E. Jansen            maxo=max(maxo,iodiagnz[i]);
190*10167291SKenneth E. Jansen //           iodiagnz[i]=max(iodiagnz[i],10);
191*10167291SKenneth E. Jansen //           idiagnz[i]=max(idiagnz[i],10);
192*10167291SKenneth E. Jansen //           iodiagnz[i]=2*iodiagnz[i];   //  estimate a bit higher for off-part interactions
193*10167291SKenneth E. Jansen //           idiagnz[i]=2*idiagnz[i];   //  estimate a bit higher for off-part interactions
194*10167291SKenneth E. Jansen         }
195*10167291SKenneth E. Jansen // the above was pretty good but below is faster and not too much more memory...of course once you do this
196*10167291SKenneth E. Jansen // could just use the constant fill parameters in create but keep it alive for potential later optimization
197*10167291SKenneth E. Jansen 
198*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
199*10167291SKenneth E. Jansen            iodiagnz[i]=1.3*maxd;
200*10167291SKenneth E. Jansen            idiagnz[i]=1.3*maxd;
201*10167291SKenneth E. Jansen         }
202*10167291SKenneth E. Jansen 
203*10167291SKenneth E. Jansen 
204*10167291SKenneth E. Jansen 
205*10167291SKenneth E. Jansen         if(workfc.numpe < 200){
206*10167291SKenneth E. Jansen           printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
207*10167291SKenneth E. Jansen           printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
208*10167291SKenneth E. Jansen         }
209*10167291SKenneth E. Jansen         // Print debug info
210*10167291SKenneth E. Jansen         if(nshgt < 200){
211*10167291SKenneth E. Jansen         int irank;
212*10167291SKenneth E. Jansen         for(irank=0;irank<workfc.numpe;irank++) {
213*10167291SKenneth E. Jansen           if(irank == workfc.myrank){
214*10167291SKenneth E. Jansen             printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
215*10167291SKenneth E. Jansen             for(i=0;i<iownnodes;i++) {
216*10167291SKenneth E. Jansen               printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
217*10167291SKenneth E. Jansen             }
218*10167291SKenneth E. Jansen           }
219*10167291SKenneth E. Jansen          MPI_Barrier(MPI_COMM_WORLD);
220*10167291SKenneth E. Jansen         }
221*10167291SKenneth E. Jansen         }
222*10167291SKenneth E. Jansen        }
223*10167291SKenneth E. Jansen        if(firstpetsccall == 1) {
224*10167291SKenneth E. Jansen 
225*10167291SKenneth E. Jansen         petsc_bs = (PetscInt) nflow;
226*10167291SKenneth E. Jansen         petsc_m  = (PetscInt) nflow* (PetscInt) iownnodes;
227*10167291SKenneth E. Jansen         petsc_M  = (PetscInt) nshgt * (PetscInt) nflow;
228*10167291SKenneth E. Jansen         petsc_PA  = (PetscInt) 40;
229*10167291SKenneth E. Jansen         // TODO: fixe nshgt for 64 bit integer!!!!!
230*10167291SKenneth E. Jansen 
231*10167291SKenneth E. Jansen         //ierr = MatCreateBAIJ(PETSC_COMM_WORLD, nflow, nflow*iownnodes,
232*10167291SKenneth E. Jansen         //       nflow*iownnodes, nshgt*nflow, nshgt*nflow, 0,
233*10167291SKenneth E. Jansen 
234*10167291SKenneth E. Jansen // this has no pre-allocate        ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
235*10167291SKenneth E. Jansen // this has no pre-allocate                             0, NULL, 0, NULL, &lhsP);
236*10167291SKenneth E. Jansen 
237*10167291SKenneth E. Jansen // next 2 do pre-allocate
238*10167291SKenneth E. Jansen 
239*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
240*10167291SKenneth E. Jansen //      if(workfc.myrank ==0) printf("Before matCreateBAIJ petsc_bs,petsc_m,petsc_M,petsc_PA %ld %ld %ld %ld \n",petsc_bs,petsc_m,petsc_M,petsc_PA);
241*10167291SKenneth E. Jansen 
242*10167291SKenneth E. Jansen         ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
243*10167291SKenneth E. Jansen                              0, idiagnz, 0, iodiagnz, &lhsP);
244*10167291SKenneth E. Jansen 
245*10167291SKenneth E. Jansen //                              petsc_PA, NULL, petsc_PA, NULL, &lhsP);
246*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
247*10167291SKenneth E. Jansen //      if(workfc.myrank ==0) printf("Before matSetOoption 1  \n");
248*10167291SKenneth E. Jansen         ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
249*10167291SKenneth E. Jansen 
250*10167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
251*10167291SKenneth E. Jansen         ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
252*10167291SKenneth E. Jansen         ierr = MatSetUp(lhsP);
253*10167291SKenneth E. Jansen 
254*10167291SKenneth E. Jansen       PetscInt myMatStart, myMatEnd;
255*10167291SKenneth E. Jansen       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
256*10167291SKenneth E. Jansen       }
257*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
258*10167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
259*10167291SKenneth E. Jansen       if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
260*10167291SKenneth E. Jansen 
261*10167291SKenneth E. Jansen       get_time(duration, (duration+1));
262*10167291SKenneth E. Jansen 
263*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
264*10167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
265*10167291SKenneth E. Jansen //      for (i=0; i<nshg ; i++) {
266*10167291SKenneth E. Jansen //            assert(fncorp[i]>0);
267*10167291SKenneth E. Jansen //      }
268*10167291SKenneth E. Jansen 
269*10167291SKenneth E. Jansen       ElmGMRPETSc(y,             ac,            x,
270*10167291SKenneth E. Jansen                   shp,           shgl,          iBC,
271*10167291SKenneth E. Jansen                   BC,            shpb,
272*10167291SKenneth E. Jansen                   shglb,         res,
273*10167291SKenneth E. Jansen                   rmes,                   iper,
274*10167291SKenneth E. Jansen                   ilwork,        rerr ,         &lhsP);
275*10167291SKenneth E. Jansen       get_time((duration+2), (duration+3));
276*10167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
277*10167291SKenneth E. Jansen                         (duration+1), (duration+3),
278*10167291SKenneth E. Jansen                         "ElmGMRPETSc \0");  // char(0))
279*10167291SKenneth E. Jansen //       printf("after ElmGMRPETSc\n");
280*10167291SKenneth E. Jansen        if(firstpetsccall == 1) {
281*10167291SKenneth E. Jansen       // Setup IndexSet. For now, we mimic vector insertion procedure
282*10167291SKenneth E. Jansen       // Since we always reference by global indexes this doesn't matter
283*10167291SKenneth E. Jansen       // except for cache performance)
284*10167291SKenneth E. Jansen       // TODO: Better arrangment?
285*10167291SKenneth E. Jansen         nodetoinsert = 0;
286*10167291SKenneth E. Jansen         k=0;
287*10167291SKenneth E. Jansen         if(workfc.numpe > 1) {
288*10167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
289*10167291SKenneth E. Jansen             nodetoinsert = fncorp[i]-1;
290*10167291SKenneth E. Jansen //            if(nodetoinsert<0) printf("nodetoinsert <0! myrank: %d, value: %ld\n", workfc.myrank, nodetoinsert);
291*10167291SKenneth E. Jansen //            assert(fncorp[i]>=0);
292*10167291SKenneth E. Jansen             for (j=1; j<=nflow; j++) {
293*10167291SKenneth E. Jansen               indexsetary[k] = nodetoinsert*nflow+(j-1);
294*10167291SKenneth E. Jansen               assert(indexsetary[k]>=0);
295*10167291SKenneth E. Jansen //              assert(fncorp[i]>=0);
296*10167291SKenneth E. Jansen               k = k+1;
297*10167291SKenneth E. Jansen             }
298*10167291SKenneth E. Jansen           }
299*10167291SKenneth E. Jansen         }
300*10167291SKenneth E. Jansen         else {
301*10167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
302*10167291SKenneth E. Jansen             nodetoinsert = i;
303*10167291SKenneth E. Jansen             for (j=1; j<=nflow; j++) {
304*10167291SKenneth E. Jansen               indexsetary[k] = nodetoinsert*nflow+(j-1);
305*10167291SKenneth E. Jansen               k = k+1;
306*10167291SKenneth E. Jansen             }
307*10167291SKenneth E. Jansen           }
308*10167291SKenneth E. Jansen         }
309*10167291SKenneth E. Jansen 
310*10167291SKenneth E. Jansen         for (i=0; i<nshg*nflow; i++) {
311*10167291SKenneth E. Jansen           gblindexsetary[i] = i ; //TODO: better option for performance?
312*10167291SKenneth E. Jansen         }
313*10167291SKenneth E. Jansen //  Create Vector Index Maps
314*10167291SKenneth E. Jansen         petsc_n  = (PetscInt) nshg * (PetscInt) nflow;
315*10167291SKenneth E. Jansen         //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, indexsetary,
316*10167291SKenneth E. Jansen      	//       PETSC_COPY_VALUES, &LocalIndexSet);
317*10167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
318*10167291SKenneth E. Jansen      	       PETSC_COPY_VALUES, &LocalIndexSet);
319*10167291SKenneth E. Jansen //        printf("after 1st ISCreateGeneral %d\n", ierr);
320*10167291SKenneth E. Jansen         //ierr = ISCreateGeneral(PETSC_COMM_SELF, nshg*nflow, gblindexsetary,
321*10167291SKenneth E. Jansen         //       PETSC_COPY_VALUES, &GlobalIndexSet);
322*10167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetary,
323*10167291SKenneth E. Jansen                PETSC_COPY_VALUES, &GlobalIndexSet);
324*10167291SKenneth E. Jansen //        printf("after 2nd ISCreateGeneral %d\n", ierr);
325*10167291SKenneth E. Jansen         ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSet,&VectorMapping);
326*10167291SKenneth E. Jansen //        printf("after 1st ISLocalToGlobalMappingCreateIS %d\n",ierr);
327*10167291SKenneth E. Jansen         ierr =  ISLocalToGlobalMappingCreateIS(GlobalIndexSet,&GblVectorMapping);
328*10167291SKenneth E. Jansen //        printf("after 2nd ISLocalToGlobalMappingCreateIS %d\n",ierr);
329*10167291SKenneth E. Jansen       }
330*10167291SKenneth E. Jansen       if(genpar.lhs == 1) {
331*10167291SKenneth E. Jansen       get_time((duration), (duration+1));
332*10167291SKenneth E. Jansen       ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
333*10167291SKenneth E. Jansen       ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
334*10167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
335*10167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
336*10167291SKenneth E. Jansen                         (duration+1), (duration+3),
337*10167291SKenneth E. Jansen                         "MatAssembly \0"); // char(0))
338*10167291SKenneth E. Jansen       get_time(duration, (duration+1));
339*10167291SKenneth E. Jansen       }
340*10167291SKenneth E. Jansen       if(firstpetsccall == 1) {
341*10167291SKenneth E. Jansen         ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
342*10167291SKenneth E. Jansen //  TODO Should this be LocalRow? LocalCol? or does LocalRow always == LocalCol
343*10167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resP);
344*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
345*10167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &DyP);
346*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
347*10167291SKenneth E. Jansen       }
348*10167291SKenneth E. Jansen       ierr = VecZeroEntries(resP);
349*10167291SKenneth E. Jansen       ierr = VecZeroEntries(DyP);
350*10167291SKenneth E. Jansen       if(firstpetsccall == 1) {
351*10167291SKenneth E. Jansen         //ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, nshgt*nflow, &resPGlobal);
352*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobal);
353*10167291SKenneth E. Jansen         //ierr = VecCreateSeq(PETSC_COMM_SELF, nshg*nflow, &DyPLocal);
354*10167291SKenneth E. Jansen         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
355*10167291SKenneth E. Jansen       }
356*10167291SKenneth E. Jansen         ierr = VecZeroEntries(resPGlobal);
357*10167291SKenneth E. Jansen //         call VecZeroEntries(DyPLocal, ierr)
358*10167291SKenneth E. Jansen 
359*10167291SKenneth E. Jansen       PetscRow=0;
360*10167291SKenneth E. Jansen       k = 0;
361*10167291SKenneth E. Jansen       int index;
362*10167291SKenneth E. Jansen       for (i=0; i<nshg; i++ ){
363*10167291SKenneth E. Jansen         for (j = 1; j<=nflow; j++){
364*10167291SKenneth E. Jansen           index = i + (j-1)*nshg;
365*10167291SKenneth E. Jansen           valtoinsert = res[index];
366*10167291SKenneth E. Jansen           if(workfc.numpe > 1) {
367*10167291SKenneth E. Jansen             PetscRow = (fncorp[i]-1)*nflow+(j-1);
368*10167291SKenneth E. Jansen           }
369*10167291SKenneth E. Jansen           else {
370*10167291SKenneth E. Jansen             PetscRow = i*nflow+(j-1);
371*10167291SKenneth E. Jansen           }
372*10167291SKenneth E. Jansen           assert(fncorp[i]<=nshgt);
373*10167291SKenneth E. Jansen           assert(fncorp[i]>0);
374*10167291SKenneth E. Jansen           assert(PetscRow>=0);
375*10167291SKenneth E. Jansen           assert(PetscRow<=nshgt*nflow);
376*10167291SKenneth E. Jansen           ierr =  VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES);
377*10167291SKenneth E. Jansen         }
378*10167291SKenneth E. Jansen       }
379*10167291SKenneth E. Jansen //      printf("after VecSetValue loop %d\n",ierr);
380*10167291SKenneth E. Jansen       ierr = VecAssemblyBegin(resP);
381*10167291SKenneth E. Jansen       ierr = VecAssemblyEnd(resP);
382*10167291SKenneth E. Jansen       get_time((duration+2), (duration+3));
383*10167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
384*10167291SKenneth E. Jansen                         (duration+1), (duration+3),
385*10167291SKenneth E. Jansen                         "VectorWorkPre \0"); // char(0))
386*10167291SKenneth E. Jansen 
387*10167291SKenneth E. Jansen       get_time((duration),(duration+1));
388*10167291SKenneth E. Jansen 
389*10167291SKenneth E. Jansen       if(firstpetsccall == 1) {
390*10167291SKenneth E. Jansen         ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
391*10167291SKenneth E. Jansen         ierr = KSPSetOperators(ksp, lhsP, lhsP);
392*10167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksp, lhsP, lhsP, SAME_NONZERO_PATTERN);
393*10167291SKenneth E. Jansen         ierr = KSPGetPC(ksp, &pc);
394*10167291SKenneth E. Jansen         ierr = PCSetType(pc, PCPBJACOBI);
395*10167291SKenneth E. Jansen         PetscInt maxits;
396*10167291SKenneth E. Jansen         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
397*10167291SKenneth E. Jansen         //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace);
398*10167291SKenneth E. Jansen         ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
399*10167291SKenneth E. Jansen         ierr = KSPSetFromOptions(ksp);
400*10167291SKenneth E. Jansen       }
401*10167291SKenneth E. Jansen       ierr = KSPSolve(ksp, resP, DyP);
402*10167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
403*10167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
404*10167291SKenneth E. Jansen                         (duration+1), (duration+3),
405*10167291SKenneth E. Jansen                         "KSPSolve \0"); // char(0))
406*10167291SKenneth E. Jansen      //if(myrank .eq. 0) then
407*10167291SKenneth E. Jansen      //!write(*,"(A, E10.3)") "KSPSolve ", elapsed
408*10167291SKenneth E. Jansen      //end if
409*10167291SKenneth E. Jansen       get_time((duration),(duration+1));
410*10167291SKenneth E. Jansen       if(firstpetsccall == 1) {
411*10167291SKenneth E. Jansen       ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, GlobalIndexSet, &scatter7);
412*10167291SKenneth E. Jansen       }
413*10167291SKenneth E. Jansen       ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
414*10167291SKenneth E. Jansen       ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
415*10167291SKenneth E. Jansen       ierr = VecGetArray(DyPLocal, &parray);
416*10167291SKenneth E. Jansen       PetscRow = 0;
417*10167291SKenneth E. Jansen       for ( node = 0; node< nshg; node++) {
418*10167291SKenneth E. Jansen         for (var = 1; var<= nflow; var++) {
419*10167291SKenneth E. Jansen           index = node + (var-1)*nshg;
420*10167291SKenneth E. Jansen           Dy[index] = parray[PetscRow];
421*10167291SKenneth E. Jansen           PetscRow = PetscRow+1;
422*10167291SKenneth E. Jansen         }
423*10167291SKenneth E. Jansen       }
424*10167291SKenneth E. Jansen       ierr = VecRestoreArray(DyPLocal, &parray);
425*10167291SKenneth E. Jansen 
426*10167291SKenneth E. Jansen       firstpetsccall = 0;
427*10167291SKenneth E. Jansen       // later timer ('Solver  ');
428*10167291SKenneth E. Jansen 
429*10167291SKenneth E. Jansen // .... output the statistics
430*10167291SKenneth E. Jansen //
431*10167291SKenneth E. Jansen       itrpar.iKs=0; // see rstat()
432*10167291SKenneth E. Jansen       PetscInt its;
433*10167291SKenneth E. Jansen       //ierr = KSPGetIterationNumber(ksp, &itrpar.iKs);
434*10167291SKenneth E. Jansen       ierr = KSPGetIterationNumber(ksp, &its);
435*10167291SKenneth E. Jansen       itrpar.iKs = (int) its;
436*10167291SKenneth E. Jansen       get_time((duration+2),(duration+3));
437*10167291SKenneth E. Jansen       get_max_time_diff((duration), (duration+2),
438*10167291SKenneth E. Jansen                         (duration+1), (duration+3),
439*10167291SKenneth E. Jansen                         "solWork \0"); // char(0))
440*10167291SKenneth E. Jansen       itrpar.ntotGM += (int) its;
441*10167291SKenneth E. Jansen       rstat (res, ilwork);
442*10167291SKenneth E. Jansen //
443*10167291SKenneth E. Jansen // .... stop the timer
444*10167291SKenneth E. Jansen //
445*10167291SKenneth E. Jansen // 3002 continue                  ! no solve just res.
446*10167291SKenneth E. Jansen       // later call timer ('Back    ')
447*10167291SKenneth E. Jansen //
448*10167291SKenneth E. Jansen // .... end
449*10167291SKenneth E. Jansen //
450*10167291SKenneth E. Jansen }
451*10167291SKenneth E. Jansen 
452*10167291SKenneth E. Jansen void     SolGMRpSclr(double* y,         double* ac,
453*10167291SKenneth E. Jansen      	double* x,      double* elDw,   int* iBC,       double* BC,
454*10167291SKenneth E. Jansen      	int* col,       int* row,
455*10167291SKenneth E. Jansen      	int* iper,
456*10167291SKenneth E. Jansen      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
457*10167291SKenneth E. Jansen      	double* shglb,  double* res,   double* Dy,     long long int* fncorp)
458*10167291SKenneth E. Jansen {
459*10167291SKenneth E. Jansen //
460*10167291SKenneth E. Jansen // ----------------------------------------------------------------------
461*10167291SKenneth E. Jansen //
462*10167291SKenneth E. Jansen //  This is the preconditioned GMRES driver routine.
463*10167291SKenneth E. Jansen //
464*10167291SKenneth E. Jansen // input:
465*10167291SKenneth E. Jansen //  y      (nshg,ndof)           : Y-variables at n+alpha_v
466*10167291SKenneth E. Jansen //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
467*10167291SKenneth E. Jansen //  yold   (nshg,ndof)           : Y-variables at beginning of step
468*10167291SKenneth E. Jansen //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
469*10167291SKenneth E. Jansen //  x      (numnp,nsd)            : node coordinates
470*10167291SKenneth E. Jansen //  iBC    (nshg)                : BC codes
471*10167291SKenneth E. Jansen //  BC     (nshg,ndofBC)         : BC constraint parameters
472*10167291SKenneth E. Jansen //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
473*10167291SKenneth E. Jansen //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
474*10167291SKenneth E. Jansen //
475*10167291SKenneth E. Jansen // ----------------------------------------------------------------------
476*10167291SKenneth E. Jansen //
477*10167291SKenneth E. Jansen 
478*10167291SKenneth E. Jansen 
479*10167291SKenneth E. Jansen // Get variables from common_c.h
480*10167291SKenneth E. Jansen       int nshg, nflow, nsd, iownnodes;
481*10167291SKenneth E. Jansen       nshg  = conpar.nshg;
482*10167291SKenneth E. Jansen       nsd = NSD;
483*10167291SKenneth E. Jansen       iownnodes = conpar.iownnodes;
484*10167291SKenneth E. Jansen 
485*10167291SKenneth E. Jansen       gcorp_t nshgt;
486*10167291SKenneth E. Jansen       gcorp_t mbeg;
487*10167291SKenneth E. Jansen       gcorp_t mend;
488*10167291SKenneth E. Jansen       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
489*10167291SKenneth E. Jansen       mbeg = (gcorp_t) newdim.minowned;
490*10167291SKenneth E. Jansen       mend = (gcorp_t) newdim.maxowned;
491*10167291SKenneth E. Jansen 
492*10167291SKenneth E. Jansen 
493*10167291SKenneth E. Jansen       int node, element, var, eqn;
494*10167291SKenneth E. Jansen       double valtoinsert;
495*10167291SKenneth E. Jansen       int nenl, iel, lelCat, lcsyst, iorder, nshl;
496*10167291SKenneth E. Jansen       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
497*10167291SKenneth E. Jansen       double DyFlats[nshg];
498*10167291SKenneth E. Jansen       double DyFlatPhastas[nshg];
499*10167291SKenneth E. Jansen       double rmes[nshg];
500*10167291SKenneth E. Jansen // DEBUG
501*10167291SKenneth E. Jansen       int i,j,k,l,m;
502*10167291SKenneth E. Jansen       PetscInt indexsetarys[nshg];
503*10167291SKenneth E. Jansen       PetscInt gblindexsetarys[nshg];
504*10167291SKenneth E. Jansen       PetscInt nodetoinsert;
505*10167291SKenneth E. Jansen 
506*10167291SKenneth E. Jansen       double  real_rtol, real_abstol, real_dtol;
507*10167291SKenneth E. Jansen       double *parray;
508*10167291SKenneth E. Jansen       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
509*10167291SKenneth E. Jansen       PetscInt petsc_n;
510*10167291SKenneth E. Jansen       PetscOne = 1;
511*10167291SKenneth E. Jansen       uint64_t duration[4];
512*10167291SKenneth E. Jansen 
513*10167291SKenneth E. Jansen 
514*10167291SKenneth E. Jansen //
515*10167291SKenneth E. Jansen //
516*10167291SKenneth E. Jansen // .... *******************>> Element Data Formation <<******************
517*10167291SKenneth E. Jansen //
518*10167291SKenneth E. Jansen //
519*10167291SKenneth E. Jansen // .... set the parameters for flux and surface tension calculations
520*10167291SKenneth E. Jansen //
521*10167291SKenneth E. Jansen //
522*10167291SKenneth E. Jansen       genpar.idflx = 0 ;
523*10167291SKenneth E. Jansen       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
524*10167291SKenneth E. Jansen       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
525*10167291SKenneth E. Jansen 
526*10167291SKenneth E. Jansen 
527*10167291SKenneth E. Jansen 
528*10167291SKenneth E. Jansen       PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
529*10167291SKenneth E. Jansen       PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
530*10167291SKenneth E. Jansen       gcorp_t glbNZ;
531*10167291SKenneth E. Jansen 
532*10167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
533*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
534*10167291SKenneth E. Jansen              idiagnz[i]=0;
535*10167291SKenneth E. Jansen              iodiagnz[i]=0;
536*10167291SKenneth E. Jansen         }
537*10167291SKenneth E. Jansen         i=0;
538*10167291SKenneth E. Jansen         for(k=0;k<nshg;k++) {
539*10167291SKenneth E. Jansen           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
540*10167291SKenneth E. Jansen // this node is not owned by this rank so we skip
541*10167291SKenneth E. Jansen           } else {
542*10167291SKenneth E. Jansen            for(j=col[i]-1;j<col[i+1]-1;j++) {
543*10167291SKenneth E. Jansen              glbNZ=fncorp[row[j]];
544*10167291SKenneth E. Jansen              if((glbNZ < mbeg) || (glbNZ > mend)) {
545*10167291SKenneth E. Jansen                 iodiagnz[i]++;
546*10167291SKenneth E. Jansen              } else {
547*10167291SKenneth E. Jansen                 idiagnz[i]++;
548*10167291SKenneth E. Jansen              }
549*10167291SKenneth E. Jansen            }
550*10167291SKenneth E. Jansen            i++;
551*10167291SKenneth E. Jansen           }
552*10167291SKenneth E. Jansen         }
553*10167291SKenneth E. Jansen         gcorp_t mind=1000;
554*10167291SKenneth E. Jansen         gcorp_t mino=1000;
555*10167291SKenneth E. Jansen         gcorp_t maxd=0;
556*10167291SKenneth E. Jansen         gcorp_t maxo=0;
557*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
558*10167291SKenneth E. Jansen            mind=min(mind,idiagnz[i]);
559*10167291SKenneth E. Jansen            mino=min(mino,iodiagnz[i]);
560*10167291SKenneth E. Jansen            maxd=max(maxd,idiagnz[i]);
561*10167291SKenneth E. Jansen            maxo=max(maxo,iodiagnz[i]);
562*10167291SKenneth E. Jansen         }
563*10167291SKenneth E. Jansen 
564*10167291SKenneth E. Jansen         for(i=0;i<iownnodes;i++) {
565*10167291SKenneth E. Jansen            iodiagnz[i]=1.3*maxd;
566*10167291SKenneth E. Jansen            idiagnz[i]=1.3*maxd;
567*10167291SKenneth E. Jansen         }
568*10167291SKenneth E. Jansen 
569*10167291SKenneth E. Jansen 
570*10167291SKenneth E. Jansen 
571*10167291SKenneth E. Jansen        }
572*10167291SKenneth E. Jansen        if(firstpetsccalls == 1) {
573*10167291SKenneth E. Jansen 
574*10167291SKenneth E. Jansen         petsc_m  = (PetscInt) iownnodes;
575*10167291SKenneth E. Jansen         petsc_M  = (PetscInt) nshgt;
576*10167291SKenneth E. Jansen         petsc_PA  = (PetscInt) 40;
577*10167291SKenneth E. Jansen 
578*10167291SKenneth E. Jansen         ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
579*10167291SKenneth E. Jansen                              0, idiagnz, 0, iodiagnz, &lhsPs);
580*10167291SKenneth E. Jansen 
581*10167291SKenneth E. Jansen         ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
582*10167291SKenneth E. Jansen 
583*10167291SKenneth E. Jansen // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
584*10167291SKenneth E. Jansen         ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
585*10167291SKenneth E. Jansen         ierr = MatSetUp(lhsPs);
586*10167291SKenneth E. Jansen 
587*10167291SKenneth E. Jansen       PetscInt myMatStart, myMatEnd;
588*10167291SKenneth E. Jansen       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
589*10167291SKenneth E. Jansen       }
590*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
591*10167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
592*10167291SKenneth E. Jansen       if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
593*10167291SKenneth E. Jansen 
594*10167291SKenneth E. Jansen       get_time(duration, (duration+1));
595*10167291SKenneth E. Jansen 
596*10167291SKenneth E. Jansen //      MPI_Barrier(MPI_COMM_WORLD);
597*10167291SKenneth E. Jansen  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
598*10167291SKenneth E. Jansen //      for (i=0; i<nshg ; i++) {
599*10167291SKenneth E. Jansen //            assert(fncorp[i]>0);
600*10167291SKenneth E. Jansen //      }
601*10167291SKenneth E. Jansen 
602*10167291SKenneth E. Jansen       ElmGMRPETScSclr(y,             ac,            x, elDw,
603*10167291SKenneth E. Jansen                   shp,           shgl,          iBC,
604*10167291SKenneth E. Jansen                   BC,            shpb,
605*10167291SKenneth E. Jansen                   shglb,         res,
606*10167291SKenneth E. Jansen                   rmes,                   iper,
607*10167291SKenneth E. Jansen                   ilwork,        &lhsPs);
608*10167291SKenneth E. Jansen        if(firstpetsccalls == 1) {
609*10167291SKenneth E. Jansen       // Setup IndexSet. For now, we mimic vector insertion procedure
610*10167291SKenneth E. Jansen       // Since we always reference by global indexes this doesn't matter
611*10167291SKenneth E. Jansen       // except for cache performance)
612*10167291SKenneth E. Jansen       // TODO: Better arrangment?
613*10167291SKenneth E. Jansen         nodetoinsert = 0;
614*10167291SKenneth E. Jansen         k=0;
615*10167291SKenneth E. Jansen         if(workfc.numpe > 1) {
616*10167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
617*10167291SKenneth E. Jansen             nodetoinsert = fncorp[i]-1;
618*10167291SKenneth E. Jansen             indexsetarys[k] = nodetoinsert;
619*10167291SKenneth E. Jansen             k = k+1;
620*10167291SKenneth E. Jansen           }
621*10167291SKenneth E. Jansen         }
622*10167291SKenneth E. Jansen         else {
623*10167291SKenneth E. Jansen           for (i=0; i<nshg ; i++) {
624*10167291SKenneth E. Jansen             nodetoinsert = i;
625*10167291SKenneth E. Jansen             indexsetarys[k] = nodetoinsert;
626*10167291SKenneth E. Jansen             k = k+1;
627*10167291SKenneth E. Jansen           }
628*10167291SKenneth E. Jansen         }
629*10167291SKenneth E. Jansen 
630*10167291SKenneth E. Jansen         for (i=0; i<nshg; i++) {
631*10167291SKenneth E. Jansen           gblindexsetarys[i] = i ; //TODO: better option for performance?
632*10167291SKenneth E. Jansen         }
633*10167291SKenneth E. Jansen //  Create Vector Index Maps
634*10167291SKenneth E. Jansen         petsc_n  = (PetscInt) nshg;
635*10167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
636*10167291SKenneth E. Jansen      	       PETSC_COPY_VALUES, &LocalIndexSets);
637*10167291SKenneth E. Jansen         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, gblindexsetarys,
638*10167291SKenneth E. Jansen                PETSC_COPY_VALUES, &GlobalIndexSets);
639*10167291SKenneth E. Jansen         ierr = ISLocalToGlobalMappingCreateIS(LocalIndexSets,&VectorMappings);
640*10167291SKenneth E. Jansen         ierr =  ISLocalToGlobalMappingCreateIS(GlobalIndexSets,&GblVectorMappings);
641*10167291SKenneth E. Jansen       }
642*10167291SKenneth E. Jansen       if(genpar.lhs ==1) {
643*10167291SKenneth E. Jansen       ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
644*10167291SKenneth E. Jansen       ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
645*10167291SKenneth E. Jansen       }
646*10167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
647*10167291SKenneth E. Jansen         ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
648*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
649*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
650*10167291SKenneth E. Jansen       }
651*10167291SKenneth E. Jansen       ierr = VecZeroEntries(resPs);
652*10167291SKenneth E. Jansen       ierr = VecZeroEntries(DyPs);
653*10167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
654*10167291SKenneth E. Jansen         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPGlobals);
655*10167291SKenneth E. Jansen         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
656*10167291SKenneth E. Jansen       }
657*10167291SKenneth E. Jansen         ierr = VecZeroEntries(resPGlobals);
658*10167291SKenneth E. Jansen 
659*10167291SKenneth E. Jansen       PetscRow=0;
660*10167291SKenneth E. Jansen       k = 0;
661*10167291SKenneth E. Jansen       int index;
662*10167291SKenneth E. Jansen       for (i=0; i<nshg; i++ ){
663*10167291SKenneth E. Jansen           valtoinsert = res[i];
664*10167291SKenneth E. Jansen           if(workfc.numpe > 1) {
665*10167291SKenneth E. Jansen             PetscRow = (fncorp[i]-1);
666*10167291SKenneth E. Jansen           }
667*10167291SKenneth E. Jansen           else {
668*10167291SKenneth E. Jansen             PetscRow = i-1;
669*10167291SKenneth E. Jansen           }
670*10167291SKenneth E. Jansen           assert(fncorp[i]<=nshgt);
671*10167291SKenneth E. Jansen           assert(fncorp[i]>0);
672*10167291SKenneth E. Jansen           assert(PetscRow>=0);
673*10167291SKenneth E. Jansen           assert(PetscRow<=nshgt);
674*10167291SKenneth E. Jansen           ierr =  VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES);
675*10167291SKenneth E. Jansen       }
676*10167291SKenneth E. Jansen //      printf("after VecSetValue loop %d\n",ierr);
677*10167291SKenneth E. Jansen       ierr = VecAssemblyBegin(resPs);
678*10167291SKenneth E. Jansen       ierr = VecAssemblyEnd(resPs);
679*10167291SKenneth E. Jansen 
680*10167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
681*10167291SKenneth E. Jansen         ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
682*10167291SKenneth E. Jansen         ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
683*10167291SKenneth E. Jansen //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN);
684*10167291SKenneth E. Jansen         ierr = KSPGetPC(ksps, &pcs);
685*10167291SKenneth E. Jansen         ierr = PCSetType(pcs, PCPBJACOBI);
686*10167291SKenneth E. Jansen         PetscInt maxits;
687*10167291SKenneth E. Jansen         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
688*10167291SKenneth E. Jansen         //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace);
689*10167291SKenneth E. Jansen         ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
690*10167291SKenneth E. Jansen         ierr = KSPSetFromOptions(ksps);
691*10167291SKenneth E. Jansen       }
692*10167291SKenneth E. Jansen       ierr = KSPSolve(ksps, resPs, DyPs);
693*10167291SKenneth E. Jansen       if(firstpetsccalls == 1) {
694*10167291SKenneth E. Jansen       ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, GlobalIndexSets, &scatter7s);
695*10167291SKenneth E. Jansen       }
696*10167291SKenneth E. Jansen       ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
697*10167291SKenneth E. Jansen       ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
698*10167291SKenneth E. Jansen       ierr = VecGetArray(DyPLocals, &parray);
699*10167291SKenneth E. Jansen       PetscRow = 0;
700*10167291SKenneth E. Jansen       for ( node = 0; node< nshg; node++) {
701*10167291SKenneth E. Jansen           index = node;
702*10167291SKenneth E. Jansen           Dy[index] = parray[PetscRow];
703*10167291SKenneth E. Jansen           PetscRow = PetscRow+1;
704*10167291SKenneth E. Jansen       }
705*10167291SKenneth E. Jansen       ierr = VecRestoreArray(DyPLocals, &parray);
706*10167291SKenneth E. Jansen 
707*10167291SKenneth E. Jansen       firstpetsccalls = 0;
708*10167291SKenneth E. Jansen 
709*10167291SKenneth E. Jansen // .... output the statistics
710*10167291SKenneth E. Jansen //
711*10167291SKenneth E. Jansen //      itrpar.iKss=0; // see rstat()
712*10167291SKenneth E. Jansen       PetscInt its;
713*10167291SKenneth E. Jansen       ierr = KSPGetIterationNumber(ksps, &its);
714*10167291SKenneth E. Jansen       itrpar.iKss = (int) its;
715*10167291SKenneth E. Jansen       itrpar.ntotGMs += (int) its;
716*10167291SKenneth E. Jansen       rstatSclr (res, ilwork);
717*10167291SKenneth E. Jansen //
718*10167291SKenneth E. Jansen // .... end
719*10167291SKenneth E. Jansen //
720*10167291SKenneth E. Jansen }
721*10167291SKenneth E. Jansen 
722