xref: /phasta/phSolver/compressible/solgmrpetsc.c (revision 712d3df0b59ebebaaeaea358162c8d2c043c6e08)
1 #ifdef HAVE_PETSC
2 #include <stdio.h>
3 
4 #include "petscsys.h"
5 #include "petscvec.h"
6 #include "petscmat.h"
7 #include "petscpc.h"
8 #include "petscksp.h"
9 
10 #include "assert.h"
11 #include "common_c.h"
12 
13 #include <FCMangle.h>
14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP)
19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR)
20 #define min(a,b) (((a) < (b)) ? (a) : (b))
21 #define max(a,b) (((a) > (b)) ? (a) : (b))
22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
24 
25 typedef long long int gcorp_t;
26 
27 void get_time(uint64_t* rv, uint64_t* cycle);
28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
29 
30 //#include "auxmpi.h"
31 //      static PetscOffset poff;
32 
33       static Mat lhsP;
34       static PC pc;
35       static KSP ksp;
36       static Vec DyP, resP, DyPLocal;
37       static PetscErrorCode ierr;
38       static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
39       static IS LocalIndexSet;
40       static ISLocalToGlobalMapping VectorMapping;
41       static  VecScatter scatter7;
42       static int firstpetsccall = 1;
43 
44       static Mat lhsPs;
45       static PC pcs;
46       static KSP ksps;
47       static Vec DyPs, resPs, DyPLocals;
48       static IS LocalIndexSets;
49       static ISLocalToGlobalMapping VectorMappings;
50       static VecScatter scatter7s;
51       static int firstpetsccalls = 1;
52 
53       static int rankdump=-1;  // 8121 was the problem rank with 3.5.3
54       PetscReal resNrm;
55 
56 
57 void     SolGMRp(double* y,         double* ac,        double* yold,
58      	double* x,         int* iBC,       double* BC,
59      	int* col,       int* row,       double* lhsk,
60      	double* res,       double* BDiag,
61      	int* iper,
62      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
63      	double* shglb,     double* Dy,        double* rerr, long long int* fncorp)
64 {
65 //
66 // ----------------------------------------------------------------------
67 //
68 //  This is the preconditioned GMRES driver routine.
69 //
70 // input:
71 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
72 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
73 //  yold   (nshg,ndof)           : Y-variables at beginning of step
74 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
75 //  x      (numnp,nsd)            : node coordinates
76 //  iBC    (nshg)                : BC codes
77 //  BC     (nshg,ndofBC)         : BC constraint parameters
78 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
79 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
80 //
81 // output:
82 //  res    (nshg,nflow)           : preconditioned residual
83 //  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
84 //
85 //
86 // Zdenek Johan,  Winter 1991.  (Fortran 90)
87 // ----------------------------------------------------------------------
88 //
89 
90 
91 // Get variables from common_c.h
92       int nshg, nflow, nsd, iownnodes;
93       nshg  = conpar.nshg;
94       nflow = conpar.nflow;
95       nsd = NSD;
96       iownnodes = conpar.iownnodes;
97 
98       gcorp_t nshgt;
99       gcorp_t mbeg;
100       gcorp_t mend;
101       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
102       mbeg = (gcorp_t) newdim.minowned;
103       mend = (gcorp_t) newdim.maxowned;
104 
105 
106       int node, element, var, eqn;
107       double valtoinsert;
108       int nenl, iel, lelCat, lcsyst, iorder, nshl;
109       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
110       double DyFlat[nshg*nflow];
111       double DyFlatPhasta[nshg*nflow];
112       double rmes[nshg*nflow];
113 // DEBUG
114       int i,j,k,l,m;
115 
116       // FIXME: PetscScalar
117       double  real_rtol, real_abstol, real_dtol;
118 // /DEBUG
119       //double parray[1]; // Should be a PetscScalar
120       double *parray; // Should be a PetscScalar
121       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
122       PetscInt petsc_n;
123       PetscOne = 1;
124       uint64_t duration[4];
125 
126       gcorp_t glbNZ;
127 
128       if(firstpetsccall == 1) {
129 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill
130 //
131         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
132         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
133         for(i=0;i<iownnodes;i++) {
134              idiagnz[i]=0;
135              iodiagnz[i]=0;
136         }
137         i=0;
138         for(k=0;k<nshg;k++) {
139           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
140 // this node is not owned by this rank so we skip
141           } else {
142            for(j=col[i]-1;j<col[i+1]-1;j++) {
143 //          assert(row[j]<=nshg);
144 //          assert(fncorp[row[j]-1]<=nshgt);
145              glbNZ=fncorp[row[j]-1];
146              if((glbNZ < mbeg) || (glbNZ > mend)) {
147                 iodiagnz[i]++;
148              } else {
149                 idiagnz[i]++;
150              }
151            }
152            i++;
153           }
154         }
155         gcorp_t mind=1000;
156         gcorp_t mino=1000;
157         gcorp_t maxd=0;
158         gcorp_t maxo=0;
159         for(i=0;i<iownnodes;i++) {
160            mind=min(mind,idiagnz[i]);
161            mino=min(mino,iodiagnz[i]);
162            maxd=max(maxd,idiagnz[i]);
163            maxo=max(maxo,iodiagnz[i]);
164 //           iodiagnz[i]=max(iodiagnz[i],10);
165 //           idiagnz[i]=max(idiagnz[i],10);
166 //           iodiagnz[i]=2*iodiagnz[i];   //  estimate a bit higher for off-part interactions
167 //           idiagnz[i]=2*idiagnz[i];   //  estimate a bit higher for off-part interactions
168         }
169 // the above was pretty good but below is faster and not too much more memory...of course once you do this
170 // could just use the constant fill parameters in create but keep it alive for potential later optimization
171 
172         for(i=0;i<iownnodes;i++) {
173            iodiagnz[i]=1.3*maxd;
174            idiagnz[i]=1.3*maxd;
175         }
176 
177 
178 
179         if(workfc.numpe < 200){
180           printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
181           printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
182         }
183         // Print debug info
184         if(nshgt < 200){
185           int irank;
186           for(irank=0;irank<workfc.numpe;irank++) {
187             if(irank == workfc.myrank){
188               printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
189               for(i=0;i<iownnodes;i++) {
190                 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
191               }
192             }
193            MPI_Barrier(MPI_COMM_WORLD);
194           }
195         }
196         petsc_bs = (PetscInt) nflow;
197         petsc_m  = (PetscInt) nflow* (PetscInt) iownnodes;
198         petsc_M  = (PetscInt) nshgt * (PetscInt) nflow;
199         petsc_PA  = (PetscInt) 40;
200         ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
201                              0, idiagnz, 0, iodiagnz, &lhsP);
202 
203         ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
204 
205 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
206 #ifdef JEDBROWN
207         ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
208 #endif
209         ierr = MatSetUp(lhsP);
210 
211       PetscInt myMatStart, myMatEnd;
212       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
213 //debug
214       if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
215       }
216       if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
217 
218 //
219 // .... *******************>> Element Data Formation <<******************
220 //
221 //
222 // .... set the parameters for flux and surface tension calculations
223 //
224 //
225       genpar.idflx = 0 ;
226       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
227       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
228 
229       get_time(duration, (duration+1));
230 
231       ElmGMRPETSc(y,             ac,            x,
232                   shp,           shgl,          iBC,
233                   BC,            shpb,
234                   shglb,         res,
235                   rmes,                   iper,
236                   ilwork,        rerr ,         &lhsP);
237 
238       get_time((duration+2), (duration+3));
239       get_max_time_diff((duration), (duration+2),
240                         (duration+1), (duration+3),
241                         "ElmGMRPETSc \0");  // char(0))
242        if(firstpetsccall == 1) {
243       // Setup IndexSet. For now, we mimic vector insertion procedure
244       // Since we always reference by global indexes this doesn't matter
245       // except for cache performance)
246       // TODO: Better arrangment?
247       PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg);
248       PetscInt nodetoinsert;
249         nodetoinsert = 0;
250         k=0;
251 //debug
252          if(workfc.myrank == rankdump) {
253              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
254              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
255            }
256         if(workfc.numpe > 1) {
257           for (i=0; i<nshg ; i++) {
258             nodetoinsert = fncorp[i]-1;
259 //debug
260          if(workfc.myrank == rankdump) {
261              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
262          }
263 
264 //            assert(fncorp[i]>=0);
265             for (j=1; j<=nflow; j++) {
266               indexsetary[k] = nodetoinsert*nflow+(j-1);
267               assert(indexsetary[k]>=0);
268 //              assert(fncorp[i]>=0);
269               k = k+1;
270             }
271           }
272         }
273         else {
274           for (i=0; i<nshg ; i++) {
275             nodetoinsert = i;
276             for (j=1; j<=nflow; j++) {
277               indexsetary[k] = nodetoinsert*nflow+(j-1);
278               k = k+1;
279             }
280           }
281         }
282 
283 //  Create Vector Index Maps
284         petsc_n  = (PetscInt) nshg * (PetscInt) nflow;
285         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
286      	       PETSC_COPY_VALUES, &LocalIndexSet);
287       free(indexsetary);
288       }
289       if(genpar.lhs == 1) {
290       get_time((duration), (duration+1));
291       ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
292       ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
293       get_time((duration+2),(duration+3));
294       get_max_time_diff((duration), (duration+2),
295                         (duration+1), (duration+3),
296                         "MatAssembly \0"); // char(0))
297       get_time(duration, (duration+1));
298       }
299       if(firstpetsccall == 1) {
300         ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
301         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
302         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
303       }
304       ierr = VecZeroEntries(resP);
305       ierr = VecZeroEntries(DyP);
306       if(firstpetsccall == 1) {
307         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
308       }
309 
310       PetscRow=0;
311       k = 0;
312       int index;
313       for (i=0; i<nshg; i++ ){
314         for (j = 1; j<=nflow; j++){
315           index = i + (j-1)*nshg;
316           valtoinsert = res[index];
317           if(workfc.numpe > 1) {
318             PetscRow = (fncorp[i]-1)*nflow+(j-1);
319           }
320           else {
321             PetscRow = i*nflow+(j-1);
322           }
323           assert(fncorp[i]<=nshgt);
324           assert(fncorp[i]>0);
325           assert(PetscRow>=0);
326           assert(PetscRow<=nshgt*nflow);
327           ierr =  VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES);
328         }
329       }
330       ierr = VecAssemblyBegin(resP);
331       ierr = VecAssemblyEnd(resP);
332       ierr = VecNorm(resP,NORM_2,&resNrm);
333       get_time((duration+2), (duration+3));
334       get_max_time_diff((duration), (duration+2),
335                         (duration+1), (duration+3),
336                         "VectorWorkPre \0"); // char(0))
337 
338       get_time((duration),(duration+1));
339 
340       if(firstpetsccall == 1) {
341         ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
342         ierr = KSPSetOperators(ksp, lhsP, lhsP);
343         ierr = KSPGetPC(ksp, &pc);
344         ierr = PCSetType(pc, PCPBJACOBI);
345         PetscInt maxits;
346         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
347         ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
348         ierr = KSPSetFromOptions(ksp);
349       }
350       ierr = KSPSolve(ksp, resP, DyP);
351       get_time((duration+2),(duration+3));
352       get_max_time_diff((duration), (duration+2),
353                         (duration+1), (duration+3),
354                         "KSPSolve \0"); // char(0))
355       get_time((duration),(duration+1));
356       if(firstpetsccall == 1) {
357       ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7);
358       }
359       ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
360       ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
361       ierr = VecGetArray(DyPLocal, &parray);
362       PetscRow = 0;
363       for ( node = 0; node< nshg; node++) {
364         for (var = 1; var<= nflow; var++) {
365           index = node + (var-1)*nshg;
366           Dy[index] = parray[PetscRow];
367           PetscRow = PetscRow+1;
368         }
369       }
370       ierr = VecRestoreArray(DyPLocal, &parray);
371 
372       firstpetsccall = 0;
373 
374 // .... output the statistics
375 //
376       itrpar.iKs=0; // see rstat()
377       PetscInt its;
378       ierr = KSPGetIterationNumber(ksp, &its);
379       itrpar.iKs = (int) its;
380       get_time((duration+2),(duration+3));
381       get_max_time_diff((duration), (duration+2),
382                         (duration+1), (duration+3),
383                         "solWork \0"); // char(0))
384       itrpar.ntotGM += (int) its;
385       rstatp (&resNrm);
386 //
387 // .... end
388 //
389 }
390 
391 void     SolGMRpSclr(double* y,         double* ac,
392      	double* x,      double* elDw,   int* iBC,       double* BC,
393      	int* col,       int* row,
394      	int* iper,
395      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
396      	double* shglb,  double* res,   double* Dy,     long long int* fncorp)
397 {
398 //
399 // ----------------------------------------------------------------------
400 //
401 //  This is the preconditioned GMRES driver routine.
402 //
403 // input:
404 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
405 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
406 //  yold   (nshg,ndof)           : Y-variables at beginning of step
407 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
408 //  x      (numnp,nsd)            : node coordinates
409 //  iBC    (nshg)                : BC codes
410 //  BC     (nshg,ndofBC)         : BC constraint parameters
411 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
412 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
413 //
414 // ----------------------------------------------------------------------
415 //
416 
417 
418 // Get variables from common_c.h
419       int nshg, nflow, nsd, iownnodes;
420       nshg  = conpar.nshg;
421       nsd = NSD;
422       iownnodes = conpar.iownnodes;
423 
424       gcorp_t nshgt;
425       gcorp_t mbeg;
426       gcorp_t mend;
427       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
428       mbeg = (gcorp_t) newdim.minowned;
429       mend = (gcorp_t) newdim.maxowned;
430 
431 
432       int node, element, var, eqn;
433       double valtoinsert;
434       int nenl, iel, lelCat, lcsyst, iorder, nshl;
435       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
436       double DyFlats[nshg];
437       double DyFlatPhastas[nshg];
438       double rmes[nshg];
439 // DEBUG
440       int i,j,k,l,m;
441 
442       double  real_rtol, real_abstol, real_dtol;
443       double *parray;
444       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
445       PetscInt petsc_n;
446       PetscOne = 1;
447       uint64_t duration[4];
448 
449 
450 //
451 //
452 // .... *******************>> Element Data Formation <<******************
453 //
454 //
455 // .... set the parameters for flux and surface tension calculations
456 //
457 //
458       genpar.idflx = 0 ;
459       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
460       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
461 
462 
463 
464       gcorp_t glbNZ;
465 
466       if(firstpetsccalls == 1) {
467         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
468         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
469         for(i=0;i<iownnodes;i++) {
470              idiagnz[i]=0;
471              iodiagnz[i]=0;
472         }
473         i=0;
474         for(k=0;k<nshg;k++) {
475           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
476 // this node is not owned by this rank so we skip
477           } else {
478            for(j=col[i]-1;j<col[i+1]-1;j++) {
479              glbNZ=fncorp[row[j]-1];
480              if((glbNZ < mbeg) || (glbNZ > mend)) {
481                 iodiagnz[i]++;
482              } else {
483                 idiagnz[i]++;
484              }
485            }
486            i++;
487           }
488         }
489         gcorp_t mind=1000;
490         gcorp_t mino=1000;
491         gcorp_t maxd=0;
492         gcorp_t maxo=0;
493         for(i=0;i<iownnodes;i++) {
494            mind=min(mind,idiagnz[i]);
495            mino=min(mino,iodiagnz[i]);
496            maxd=max(maxd,idiagnz[i]);
497            maxo=max(maxo,iodiagnz[i]);
498         }
499 
500         for(i=0;i<iownnodes;i++) {
501            iodiagnz[i]=1.3*maxd;
502            idiagnz[i]=1.3*maxd;
503         }
504 
505 
506 
507 //       }
508 //       if(firstpetsccalls == 1) {
509 
510         petsc_m  = (PetscInt) iownnodes;
511         petsc_M  = (PetscInt) nshgt;
512         petsc_PA  = (PetscInt) 40;
513 
514         ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
515                             0, idiagnz, 0, iodiagnz, &lhsPs);
516 
517         ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
518 
519 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
520 #ifdef HIDEJEDBROWN
521         ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
522 #endif
523         ierr = MatSetUp(lhsPs);
524 
525       PetscInt myMatStart, myMatEnd;
526       ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd);
527       if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
528       }
529 //      MPI_Barrier(MPI_COMM_WORLD);
530  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
531       if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
532 
533       get_time(duration, (duration+1));
534 
535 //      MPI_Barrier(MPI_COMM_WORLD);
536  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
537 //      for (i=0; i<nshg ; i++) {
538 //            assert(fncorp[i]>0);
539 //      }
540 
541       ElmGMRPETScSclr(y,             ac,            x, elDw,
542                   shp,           shgl,          iBC,
543                   BC,            shpb,
544                   shglb,         res,
545                   rmes,                   iper,
546                   ilwork,        &lhsPs);
547        if(firstpetsccalls == 1) {
548       // Setup IndexSet. For now, we mimic vector insertion procedure
549       // Since we always reference by global indexes this doesn't matter
550       // except for cache performance)
551       // TODO: Better arrangment?
552 
553       PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg);
554       PetscInt nodetoinsert;
555 
556         nodetoinsert = 0;
557         k=0;
558 
559 //debug
560          if(workfc.myrank == rankdump) {
561              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
562              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
563            }
564 
565         if(workfc.numpe > 1) {
566           for (i=0; i<nshg ; i++) {
567             nodetoinsert = fncorp[i]-1;
568 //debug
569          if(workfc.myrank == rankdump) {
570              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
571          }
572 
573             indexsetarys[k] = nodetoinsert;
574             k = k+1;
575           }
576         }
577         else {
578           for (i=0; i<nshg ; i++) {
579             nodetoinsert = i;
580             indexsetarys[k] = nodetoinsert;
581             k = k+1;
582           }
583         }
584 
585 //  Create Vector Index Maps
586         petsc_n  = (PetscInt) nshg;
587         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
588      	       PETSC_COPY_VALUES, &LocalIndexSets);
589       free(indexsetarys);
590       }
591       if(genpar.lhs ==1) {
592       ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
593       ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
594       }
595       if(firstpetsccalls == 1) {
596         ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
597         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
598         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
599       }
600       ierr = VecZeroEntries(resPs);
601       ierr = VecZeroEntries(DyPs);
602       if(firstpetsccalls == 1) {
603         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
604       }
605 
606       PetscRow=0;
607       k = 0;
608       int index;
609       for (i=0; i<nshg; i++ ){
610           valtoinsert = res[i];
611           if(workfc.numpe > 1) {
612             PetscRow = (fncorp[i]-1);
613           }
614           else {
615             PetscRow = i-1;
616           }
617           assert(fncorp[i]<=nshgt);
618           assert(fncorp[i]>0);
619           assert(PetscRow>=0);
620           assert(PetscRow<=nshgt);
621           ierr =  VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES);
622       }
623       ierr = VecAssemblyBegin(resPs);
624       ierr = VecAssemblyEnd(resPs);
625       ierr = VecNorm(resPs,NORM_2,&resNrm);
626 
627       if(firstpetsccalls == 1) {
628         ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
629         ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
630         ierr = KSPGetPC(ksps, &pcs);
631         ierr = PCSetType(pcs, PCPBJACOBI);
632         PetscInt maxits;
633         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
634         ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
635         ierr = KSPSetFromOptions(ksps);
636       }
637       ierr = KSPSolve(ksps, resPs, DyPs);
638       if(firstpetsccalls == 1) {
639       ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s);
640       }
641       ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
642       ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
643       ierr = VecGetArray(DyPLocals, &parray);
644       PetscRow = 0;
645       for ( node = 0; node< nshg; node++) {
646           index = node;
647           Dy[index] = parray[PetscRow];
648           PetscRow = PetscRow+1;
649       }
650       ierr = VecRestoreArray(DyPLocals, &parray);
651 
652       firstpetsccalls = 0;
653 
654 // .... output the statistics
655 //
656 //      itrpar.iKss=0; // see rstat()
657       PetscInt its;
658       ierr = KSPGetIterationNumber(ksps, &its);
659       itrpar.iKss = (int) its;
660       itrpar.ntotGMs += (int) its;
661       rstatpSclr (&resNrm);
662 //
663 // .... end
664 //
665 }
666 #endif
667