xref: /phasta/phSolver/compressible/solgmrpetsc.c (revision 8ae99c5947842fec69391f79c89a4e29f1641d02)
1 #ifdef HAVE_PETSC
2 #include <stdio.h>
3 
4 #include "petscsys.h"
5 #include "petscvec.h"
6 #include "petscmat.h"
7 #include "petscpc.h"
8 #include "petscksp.h"
9 
10 #include "assert.h"
11 #include "common_c.h"
12 
13 #include <FCMangle.h>
14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
18 #define rstat FortranCInterface_GLOBAL_(rstat, RSTAT)
19 #define rstatSclr FortranCInterface_GLOBAL_(rstatsclr, RSTATSCLR)
20 #define min(a,b) (((a) < (b)) ? (a) : (b))
21 #define max(a,b) (((a) > (b)) ? (a) : (b))
22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
24 
25 typedef long long int gcorp_t;
26 
27 void get_time(uint64_t* rv, uint64_t* cycle);
28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
29 
30 //#include "auxmpi.h"
31 //      static PetscOffset poff;
32 
33       static Mat lhsP;
34       static PC pc;
35       static KSP ksp;
36       static Vec DyP, resP, DyPLocal;
37       static PetscErrorCode ierr;
38       static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
39       static IS LocalIndexSet;
40       static ISLocalToGlobalMapping VectorMapping;
41       static  VecScatter scatter7;
42       static int firstpetsccall = 1;
43 
44       static Mat lhsPs;
45       static PC pcs;
46       static KSP ksps;
47       static Vec DyPs, resPs, DyPLocals;
48       static IS LocalIndexSets;
49       static ISLocalToGlobalMapping VectorMappings;
50       static VecScatter scatter7s;
51       static int firstpetsccalls = 1;
52 
53       static int rankdump=-1;  // 8121 was the problem rank with 3.5.3
54 
55 
56 void     SolGMRp(double* y,         double* ac,        double* yold,
57      	double* x,         int* iBC,       double* BC,
58      	int* col,       int* row,       double* lhsk,
59      	double* res,       double* BDiag,
60      	int* iper,
61      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
62      	double* shglb,     double* Dy,        double* rerr, long long int* fncorp)
63 {
64 //
65 // ----------------------------------------------------------------------
66 //
67 //  This is the preconditioned GMRES driver routine.
68 //
69 // input:
70 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
71 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
72 //  yold   (nshg,ndof)           : Y-variables at beginning of step
73 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
74 //  x      (numnp,nsd)            : node coordinates
75 //  iBC    (nshg)                : BC codes
76 //  BC     (nshg,ndofBC)         : BC constraint parameters
77 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
78 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
79 //
80 // output:
81 //  res    (nshg,nflow)           : preconditioned residual
82 //  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
83 //
84 //
85 // Zdenek Johan,  Winter 1991.  (Fortran 90)
86 // ----------------------------------------------------------------------
87 //
88 
89 
90 // Get variables from common_c.h
91       int nshg, nflow, nsd, iownnodes;
92       nshg  = conpar.nshg;
93       nflow = conpar.nflow;
94       nsd = NSD;
95       iownnodes = conpar.iownnodes;
96 
97       gcorp_t nshgt;
98       gcorp_t mbeg;
99       gcorp_t mend;
100       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
101       mbeg = (gcorp_t) newdim.minowned;
102       mend = (gcorp_t) newdim.maxowned;
103 
104 
105       int node, element, var, eqn;
106       double valtoinsert;
107       int nenl, iel, lelCat, lcsyst, iorder, nshl;
108       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
109       double DyFlat[nshg*nflow];
110       double DyFlatPhasta[nshg*nflow];
111       double rmes[nshg*nflow];
112 // DEBUG
113       int i,j,k,l,m;
114 
115       // FIXME: PetscScalar
116       double  real_rtol, real_abstol, real_dtol;
117 // /DEBUG
118       //double parray[1]; // Should be a PetscScalar
119       double *parray; // Should be a PetscScalar
120       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
121       PetscInt petsc_n;
122       PetscOne = 1;
123       uint64_t duration[4];
124 
125       gcorp_t glbNZ;
126 
127       if(firstpetsccall == 1) {
128 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill
129 //
130         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
131         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
132         for(i=0;i<iownnodes;i++) {
133              idiagnz[i]=0;
134              iodiagnz[i]=0;
135         }
136         i=0;
137         for(k=0;k<nshg;k++) {
138           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
139 // this node is not owned by this rank so we skip
140           } else {
141            for(j=col[i]-1;j<col[i+1]-1;j++) {
142 //          assert(row[j]<=nshg);
143 //          assert(fncorp[row[j]-1]<=nshgt);
144              glbNZ=fncorp[row[j]-1];
145              if((glbNZ < mbeg) || (glbNZ > mend)) {
146                 iodiagnz[i]++;
147              } else {
148                 idiagnz[i]++;
149              }
150            }
151            i++;
152           }
153         }
154         gcorp_t mind=1000;
155         gcorp_t mino=1000;
156         gcorp_t maxd=0;
157         gcorp_t maxo=0;
158         for(i=0;i<iownnodes;i++) {
159            mind=min(mind,idiagnz[i]);
160            mino=min(mino,iodiagnz[i]);
161            maxd=max(maxd,idiagnz[i]);
162            maxo=max(maxo,iodiagnz[i]);
163 //           iodiagnz[i]=max(iodiagnz[i],10);
164 //           idiagnz[i]=max(idiagnz[i],10);
165 //           iodiagnz[i]=2*iodiagnz[i];   //  estimate a bit higher for off-part interactions
166 //           idiagnz[i]=2*idiagnz[i];   //  estimate a bit higher for off-part interactions
167         }
168 // the above was pretty good but below is faster and not too much more memory...of course once you do this
169 // could just use the constant fill parameters in create but keep it alive for potential later optimization
170 
171         for(i=0;i<iownnodes;i++) {
172            iodiagnz[i]=1.3*maxd;
173            idiagnz[i]=1.3*maxd;
174         }
175 
176 
177 
178         if(workfc.numpe < 200){
179           printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
180           printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
181         }
182         // Print debug info
183         if(nshgt < 200){
184           int irank;
185           for(irank=0;irank<workfc.numpe;irank++) {
186             if(irank == workfc.myrank){
187               printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
188               for(i=0;i<iownnodes;i++) {
189                 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
190               }
191             }
192            MPI_Barrier(MPI_COMM_WORLD);
193           }
194         }
195         petsc_bs = (PetscInt) nflow;
196         petsc_m  = (PetscInt) nflow* (PetscInt) iownnodes;
197         petsc_M  = (PetscInt) nshgt * (PetscInt) nflow;
198         petsc_PA  = (PetscInt) 40;
199         ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
200                              0, idiagnz, 0, iodiagnz, &lhsP);
201 
202         ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
203 
204 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
205 #ifdef JEDBROWN
206         ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
207 #endif
208         ierr = MatSetUp(lhsP);
209 
210       PetscInt myMatStart, myMatEnd;
211       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
212 //debug
213       if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
214       }
215       if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
216 
217 //
218 // .... *******************>> Element Data Formation <<******************
219 //
220 //
221 // .... set the parameters for flux and surface tension calculations
222 //
223 //
224       genpar.idflx = 0 ;
225       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
226       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
227 
228       get_time(duration, (duration+1));
229 
230       ElmGMRPETSc(y,             ac,            x,
231                   shp,           shgl,          iBC,
232                   BC,            shpb,
233                   shglb,         res,
234                   rmes,                   iper,
235                   ilwork,        rerr ,         &lhsP);
236 
237       get_time((duration+2), (duration+3));
238       get_max_time_diff((duration), (duration+2),
239                         (duration+1), (duration+3),
240                         "ElmGMRPETSc \0");  // char(0))
241        if(firstpetsccall == 1) {
242       // Setup IndexSet. For now, we mimic vector insertion procedure
243       // Since we always reference by global indexes this doesn't matter
244       // except for cache performance)
245       // TODO: Better arrangment?
246       PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg);
247       PetscInt nodetoinsert;
248         nodetoinsert = 0;
249         k=0;
250 //debug
251          if(workfc.myrank == rankdump) {
252              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
253              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
254            }
255         if(workfc.numpe > 1) {
256           for (i=0; i<nshg ; i++) {
257             nodetoinsert = fncorp[i]-1;
258 //debug
259          if(workfc.myrank == rankdump) {
260              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
261          }
262 
263 //            assert(fncorp[i]>=0);
264             for (j=1; j<=nflow; j++) {
265               indexsetary[k] = nodetoinsert*nflow+(j-1);
266               assert(indexsetary[k]>=0);
267 //              assert(fncorp[i]>=0);
268               k = k+1;
269             }
270           }
271         }
272         else {
273           for (i=0; i<nshg ; i++) {
274             nodetoinsert = i;
275             for (j=1; j<=nflow; j++) {
276               indexsetary[k] = nodetoinsert*nflow+(j-1);
277               k = k+1;
278             }
279           }
280         }
281 
282 //  Create Vector Index Maps
283         petsc_n  = (PetscInt) nshg * (PetscInt) nflow;
284         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
285      	       PETSC_COPY_VALUES, &LocalIndexSet);
286       free(indexsetary);
287       }
288       if(genpar.lhs == 1) {
289       get_time((duration), (duration+1));
290       ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
291       ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
292       get_time((duration+2),(duration+3));
293       get_max_time_diff((duration), (duration+2),
294                         (duration+1), (duration+3),
295                         "MatAssembly \0"); // char(0))
296       get_time(duration, (duration+1));
297       }
298       if(firstpetsccall == 1) {
299         ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
300         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
301         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
302       }
303       ierr = VecZeroEntries(resP);
304       ierr = VecZeroEntries(DyP);
305       if(firstpetsccall == 1) {
306         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
307       }
308 
309       PetscRow=0;
310       k = 0;
311       int index;
312       for (i=0; i<nshg; i++ ){
313         for (j = 1; j<=nflow; j++){
314           index = i + (j-1)*nshg;
315           valtoinsert = res[index];
316           if(workfc.numpe > 1) {
317             PetscRow = (fncorp[i]-1)*nflow+(j-1);
318           }
319           else {
320             PetscRow = i*nflow+(j-1);
321           }
322           assert(fncorp[i]<=nshgt);
323           assert(fncorp[i]>0);
324           assert(PetscRow>=0);
325           assert(PetscRow<=nshgt*nflow);
326           ierr =  VecSetValue(resP, PetscRow, valtoinsert, INSERT_VALUES);
327         }
328       }
329       ierr = VecAssemblyBegin(resP);
330       ierr = VecAssemblyEnd(resP);
331       get_time((duration+2), (duration+3));
332       get_max_time_diff((duration), (duration+2),
333                         (duration+1), (duration+3),
334                         "VectorWorkPre \0"); // char(0))
335 
336       get_time((duration),(duration+1));
337 
338       if(firstpetsccall == 1) {
339         ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
340         ierr = KSPSetOperators(ksp, lhsP, lhsP);
341         ierr = KSPGetPC(ksp, &pc);
342         ierr = PCSetType(pc, PCPBJACOBI);
343         PetscInt maxits;
344         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
345         ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
346         ierr = KSPSetFromOptions(ksp);
347       }
348       ierr = KSPSolve(ksp, resP, DyP);
349       get_time((duration+2),(duration+3));
350       get_max_time_diff((duration), (duration+2),
351                         (duration+1), (duration+3),
352                         "KSPSolve \0"); // char(0))
353       get_time((duration),(duration+1));
354       if(firstpetsccall == 1) {
355       ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7);
356       }
357       ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
358       ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
359       ierr = VecGetArray(DyPLocal, &parray);
360       PetscRow = 0;
361       for ( node = 0; node< nshg; node++) {
362         for (var = 1; var<= nflow; var++) {
363           index = node + (var-1)*nshg;
364           Dy[index] = parray[PetscRow];
365           PetscRow = PetscRow+1;
366         }
367       }
368       ierr = VecRestoreArray(DyPLocal, &parray);
369 
370       firstpetsccall = 0;
371 
372 // .... output the statistics
373 //
374       itrpar.iKs=0; // see rstat()
375       PetscInt its;
376       ierr = KSPGetIterationNumber(ksp, &its);
377       itrpar.iKs = (int) its;
378       get_time((duration+2),(duration+3));
379       get_max_time_diff((duration), (duration+2),
380                         (duration+1), (duration+3),
381                         "solWork \0"); // char(0))
382       itrpar.ntotGM += (int) its;
383       rstat (res, ilwork);
384 //
385 // .... end
386 //
387 }
388 
389 void     SolGMRpSclr(double* y,         double* ac,
390      	double* x,      double* elDw,   int* iBC,       double* BC,
391      	int* col,       int* row,
392      	int* iper,
393      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
394      	double* shglb,  double* res,   double* Dy,     long long int* fncorp)
395 {
396 //
397 // ----------------------------------------------------------------------
398 //
399 //  This is the preconditioned GMRES driver routine.
400 //
401 // input:
402 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
403 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
404 //  yold   (nshg,ndof)           : Y-variables at beginning of step
405 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
406 //  x      (numnp,nsd)            : node coordinates
407 //  iBC    (nshg)                : BC codes
408 //  BC     (nshg,ndofBC)         : BC constraint parameters
409 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
410 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
411 //
412 // ----------------------------------------------------------------------
413 //
414 
415 
416 // Get variables from common_c.h
417       int nshg, nflow, nsd, iownnodes;
418       nshg  = conpar.nshg;
419       nsd = NSD;
420       iownnodes = conpar.iownnodes;
421 
422       gcorp_t nshgt;
423       gcorp_t mbeg;
424       gcorp_t mend;
425       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
426       mbeg = (gcorp_t) newdim.minowned;
427       mend = (gcorp_t) newdim.maxowned;
428 
429 
430       int node, element, var, eqn;
431       double valtoinsert;
432       int nenl, iel, lelCat, lcsyst, iorder, nshl;
433       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
434       double DyFlats[nshg];
435       double DyFlatPhastas[nshg];
436       double rmes[nshg];
437 // DEBUG
438       int i,j,k,l,m;
439 
440       double  real_rtol, real_abstol, real_dtol;
441       double *parray;
442       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
443       PetscInt petsc_n;
444       PetscOne = 1;
445       uint64_t duration[4];
446 
447 
448 //
449 //
450 // .... *******************>> Element Data Formation <<******************
451 //
452 //
453 // .... set the parameters for flux and surface tension calculations
454 //
455 //
456       genpar.idflx = 0 ;
457       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
458       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
459 
460 
461 
462       gcorp_t glbNZ;
463 
464       if(firstpetsccalls == 1) {
465         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
466         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
467         for(i=0;i<iownnodes;i++) {
468              idiagnz[i]=0;
469              iodiagnz[i]=0;
470         }
471         i=0;
472         for(k=0;k<nshg;k++) {
473           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
474 // this node is not owned by this rank so we skip
475           } else {
476            for(j=col[i]-1;j<col[i+1]-1;j++) {
477              glbNZ=fncorp[row[j]-1];
478              if((glbNZ < mbeg) || (glbNZ > mend)) {
479                 iodiagnz[i]++;
480              } else {
481                 idiagnz[i]++;
482              }
483            }
484            i++;
485           }
486         }
487         gcorp_t mind=1000;
488         gcorp_t mino=1000;
489         gcorp_t maxd=0;
490         gcorp_t maxo=0;
491         for(i=0;i<iownnodes;i++) {
492            mind=min(mind,idiagnz[i]);
493            mino=min(mino,iodiagnz[i]);
494            maxd=max(maxd,idiagnz[i]);
495            maxo=max(maxo,iodiagnz[i]);
496         }
497 
498         for(i=0;i<iownnodes;i++) {
499            iodiagnz[i]=1.3*maxd;
500            idiagnz[i]=1.3*maxd;
501         }
502 
503 
504 
505 //       }
506 //       if(firstpetsccalls == 1) {
507 
508         petsc_m  = (PetscInt) iownnodes;
509         petsc_M  = (PetscInt) nshgt;
510         petsc_PA  = (PetscInt) 40;
511 
512         ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
513                             0, idiagnz, 0, iodiagnz, &lhsPs);
514 
515         ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
516 
517 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
518 #ifdef HIDEJEDBROWN
519         ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
520 #endif
521         ierr = MatSetUp(lhsPs);
522 
523       PetscInt myMatStart, myMatEnd;
524       ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd);
525       if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
526       }
527 //      MPI_Barrier(MPI_COMM_WORLD);
528  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
529       if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
530 
531       get_time(duration, (duration+1));
532 
533 //      MPI_Barrier(MPI_COMM_WORLD);
534  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
535 //      for (i=0; i<nshg ; i++) {
536 //            assert(fncorp[i]>0);
537 //      }
538 
539       ElmGMRPETScSclr(y,             ac,            x, elDw,
540                   shp,           shgl,          iBC,
541                   BC,            shpb,
542                   shglb,         res,
543                   rmes,                   iper,
544                   ilwork,        &lhsPs);
545        if(firstpetsccalls == 1) {
546       // Setup IndexSet. For now, we mimic vector insertion procedure
547       // Since we always reference by global indexes this doesn't matter
548       // except for cache performance)
549       // TODO: Better arrangment?
550 
551       PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg);
552       PetscInt nodetoinsert;
553 
554         nodetoinsert = 0;
555         k=0;
556 
557 //debug
558          if(workfc.myrank == rankdump) {
559              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
560              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
561            }
562 
563         if(workfc.numpe > 1) {
564           for (i=0; i<nshg ; i++) {
565             nodetoinsert = fncorp[i]-1;
566 //debug
567          if(workfc.myrank == rankdump) {
568              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
569          }
570 
571             indexsetarys[k] = nodetoinsert;
572             k = k+1;
573           }
574         }
575         else {
576           for (i=0; i<nshg ; i++) {
577             nodetoinsert = i;
578             indexsetarys[k] = nodetoinsert;
579             k = k+1;
580           }
581         }
582 
583 //  Create Vector Index Maps
584         petsc_n  = (PetscInt) nshg;
585         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
586      	       PETSC_COPY_VALUES, &LocalIndexSets);
587       free(indexsetarys);
588       }
589       if(genpar.lhs ==1) {
590       ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
591       ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
592       }
593       if(firstpetsccalls == 1) {
594         ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
595         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
596         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
597       }
598       ierr = VecZeroEntries(resPs);
599       ierr = VecZeroEntries(DyPs);
600       if(firstpetsccalls == 1) {
601         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
602       }
603 
604       PetscRow=0;
605       k = 0;
606       int index;
607       for (i=0; i<nshg; i++ ){
608           valtoinsert = res[i];
609           if(workfc.numpe > 1) {
610             PetscRow = (fncorp[i]-1);
611           }
612           else {
613             PetscRow = i-1;
614           }
615           assert(fncorp[i]<=nshgt);
616           assert(fncorp[i]>0);
617           assert(PetscRow>=0);
618           assert(PetscRow<=nshgt);
619           ierr =  VecSetValue(resPs, PetscRow, valtoinsert, INSERT_VALUES);
620       }
621 //      printf("after VecSetValue loop %d\n",ierr);
622       ierr = VecAssemblyBegin(resPs);
623       ierr = VecAssemblyEnd(resPs);
624 
625       if(firstpetsccalls == 1) {
626         ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
627         ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
628 //3.4 ierr = KSPSetOperators(ksps, lhsPs, lhsPs, SAME_NONZERO_PATTERN);
629         ierr = KSPGetPC(ksps, &pcs);
630         ierr = PCSetType(pcs, PCPBJACOBI);
631         PetscInt maxits;
632         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
633         //ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, solpar.nGMRES*solpar.Kspace);
634         ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
635         ierr = KSPSetFromOptions(ksps);
636       }
637       ierr = KSPSolve(ksps, resPs, DyPs);
638       if(firstpetsccalls == 1) {
639       ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s);
640       }
641       ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
642       ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
643       ierr = VecGetArray(DyPLocals, &parray);
644       PetscRow = 0;
645       for ( node = 0; node< nshg; node++) {
646           index = node;
647           Dy[index] = parray[PetscRow];
648           PetscRow = PetscRow+1;
649       }
650       ierr = VecRestoreArray(DyPLocals, &parray);
651 
652       firstpetsccalls = 0;
653 
654 // .... output the statistics
655 //
656 //      itrpar.iKss=0; // see rstat()
657       PetscInt its;
658       ierr = KSPGetIterationNumber(ksps, &its);
659       itrpar.iKss = (int) its;
660       itrpar.ntotGMs += (int) its;
661       rstatSclr (res, ilwork);
662 //
663 // .... end
664 //
665 }
666 #endif
667