xref: /phasta/phSolver/compressible/solgmrpetsc.c (revision 358c77e96363e9c142e42fdcd4b6bf559a56cc61)
1 #ifdef HAVE_PETSC
2 #include <stdio.h>
3 
4 #include "petscsys.h"
5 #include "petscvec.h"
6 #include "petscmat.h"
7 #include "petscpc.h"
8 #include "petscksp.h"
9 
10 #include "assert.h"
11 #include "common_c.h"
12 
13 #include <FCMangle.h>
14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP)
19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR)
20 #define min(a,b) (((a) < (b)) ? (a) : (b))
21 #define max(a,b) (((a) > (b)) ? (a) : (b))
22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
24 
25 typedef long long int gcorp_t;
26 
27 void get_time(uint64_t* rv, uint64_t* cycle);
28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
29 
30 //#include "auxmpi.h"
31 //      static PetscOffset poff;
32 
33       static Mat lhsP;
34       static PC pc;
35       static KSP ksp;
36       static Vec DyP, resP, DyPLocal;
37       static PetscErrorCode ierr;
38       static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
39       static IS LocalIndexSet;
40       static ISLocalToGlobalMapping VectorMapping;
41       static  VecScatter scatter7;
42       static int firstpetsccall = 1;
43 
44       static Mat lhsPs;
45       static PC pcs;
46       static KSP ksps;
47       static Vec DyPs, resPs, DyPLocals;
48       static IS LocalIndexSets;
49       static ISLocalToGlobalMapping VectorMappings;
50       static VecScatter scatter7s;
51       static int firstpetsccalls = 1;
52 
53       static int rankdump=-1;  // 8121 was the problem rank with 3.5.3
54       PetscReal resNrm;
55 
56 
57 void     SolGMRp(double* y,         double* ac,        double* yold,
58      	double* x,         int* iBC,       double* BC,
59      	int* col,       int* row,       double* lhsk,
60      	double* res,       double* BDiag,
61      	int* iper,
62      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
63      	double* shglb,     double* Dy,        double* rerr, long long int* fncorp)
64 {
65 //
66 // ----------------------------------------------------------------------
67 //
68 //  This is the preconditioned GMRES driver routine.
69 //
70 // input:
71 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
72 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
73 //  yold   (nshg,ndof)           : Y-variables at beginning of step
74 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
75 //  x      (numnp,nsd)            : node coordinates
76 //  iBC    (nshg)                : BC codes
77 //  BC     (nshg,ndofBC)         : BC constraint parameters
78 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
79 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
80 //
81 // output:
82 //  res    (nshg,nflow)           : preconditioned residual
83 //  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
84 //
85 //
86 // Zdenek Johan,  Winter 1991.  (Fortran 90)
87 // ----------------------------------------------------------------------
88 //
89 
90 
91 // Get variables from common_c.h
92       int nshg, nflow, nsd, iownnodes;
93       nshg  = conpar.nshg;
94       nflow = conpar.nflow;
95       nsd = NSD;
96       iownnodes = conpar.iownnodes;
97 
98       gcorp_t nshgt;
99       gcorp_t mbeg;
100       gcorp_t mend;
101       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
102       mbeg = (gcorp_t) newdim.minowned;
103       mend = (gcorp_t) newdim.maxowned;
104 
105 
106       int node, element, var, eqn;
107       double valtoinsert;
108       int nenl, iel, lelCat, lcsyst, iorder, nshl;
109       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
110       double DyFlat[nshg*nflow];
111       double DyFlatPhasta[nshg*nflow];
112       double rmes[nshg*nflow];
113 // DEBUG
114       int i,j,k,l,m;
115 
116       // FIXME: PetscScalar
117       double  real_rtol, real_abstol, real_dtol;
118 // /DEBUG
119       //double parray[1]; // Should be a PetscScalar
120       double *parray; // Should be a PetscScalar
121       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
122       PetscInt petsc_n;
123       PetscOne = 1;
124       uint64_t duration[4];
125 
126       gcorp_t glbNZ;
127 
128       if(firstpetsccall == 1) {
129 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill
130 //
131         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
132         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
133         for(i=0;i<iownnodes;i++) {
134              idiagnz[i]=0;
135              iodiagnz[i]=0;
136         }
137         i=0;
138         for(k=0;k<nshg;k++) {
139           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
140 // this node is not owned by this rank so we skip
141           } else {
142            for(j=col[i]-1;j<col[i+1]-1;j++) {
143 //          assert(row[j]<=nshg);
144 //          assert(fncorp[row[j]-1]<=nshgt);
145              glbNZ=fncorp[row[j]-1];
146              if((glbNZ < mbeg) || (glbNZ > mend)) {
147                 iodiagnz[i]++;
148              } else {
149                 idiagnz[i]++;
150              }
151            }
152            i++;
153           }
154         }
155         gcorp_t mind=1000;
156         gcorp_t mino=1000;
157         gcorp_t maxd=0;
158         gcorp_t maxo=0;
159         for(i=0;i<iownnodes;i++) {
160            mind=min(mind,idiagnz[i]);
161            mino=min(mino,iodiagnz[i]);
162            maxd=max(maxd,idiagnz[i]);
163            maxo=max(maxo,iodiagnz[i]);
164 //           iodiagnz[i]=max(iodiagnz[i],10);
165 //           idiagnz[i]=max(idiagnz[i],10);
166 //           iodiagnz[i]=2*iodiagnz[i];   //  estimate a bit higher for off-part interactions
167 //           idiagnz[i]=2*idiagnz[i];   //  estimate a bit higher for off-part interactions
168         }
169 // the above was pretty good but below is faster and not too much more memory...of course once you do this
170 // could just use the constant fill parameters in create but keep it alive for potential later optimization
171 
172         for(i=0;i<iownnodes;i++) {
173            iodiagnz[i]=1.3*maxd;
174            idiagnz[i]=1.3*maxd;
175         }
176 
177 
178 
179         if(workfc.numpe < 200){
180           printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
181           printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
182         }
183         // Print debug info
184         if(nshgt < 200){
185           int irank;
186           for(irank=0;irank<workfc.numpe;irank++) {
187             if(irank == workfc.myrank){
188               printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
189               for(i=0;i<iownnodes;i++) {
190                 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
191               }
192             }
193            MPI_Barrier(MPI_COMM_WORLD);
194           }
195         }
196         petsc_bs = (PetscInt) nflow;
197         petsc_m  = (PetscInt) nflow* (PetscInt) iownnodes;
198         petsc_M  = (PetscInt) nshgt * (PetscInt) nflow;
199         petsc_PA  = (PetscInt) 40;
200         ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
201                              0, idiagnz, 0, iodiagnz, &lhsP);
202 
203         ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
204 
205 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
206 #ifdef JEDBROWN
207         ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
208 #endif
209         ierr = MatSetUp(lhsP);
210 
211       PetscInt myMatStart, myMatEnd;
212       ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
213 //debug
214       if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
215       }
216       if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
217 
218 //
219 // .... *******************>> Element Data Formation <<******************
220 //
221 //
222 // .... set the parameters for flux and surface tension calculations
223 //
224 //
225       genpar.idflx = 0 ;
226       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
227       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
228 
229       get_time(duration, (duration+1));
230 
231       ElmGMRPETSc(y,             ac,            x,
232                   shp,           shgl,          iBC,
233                   BC,            shpb,
234                   shglb,         res,
235                   rmes,                   iper,
236                   ilwork,        rerr ,         &lhsP);
237 
238       get_time((duration+2), (duration+3));
239       get_max_time_diff((duration), (duration+2),
240                         (duration+1), (duration+3),
241                         "ElmGMRPETSc \0");  // char(0))
242        if(firstpetsccall == 1) {
243       // Setup IndexSet. For now, we mimic vector insertion procedure
244       // Since we always reference by global indexes this doesn't matter
245       // except for cache performance)
246       // TODO: Better arrangment?
247       PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg);
248       PetscInt nodetoinsert;
249         nodetoinsert = 0;
250         k=0;
251 //debug
252          if(workfc.myrank == rankdump) {
253              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
254              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
255            }
256         if(workfc.numpe > 1) {
257           for (i=0; i<nshg ; i++) {
258             nodetoinsert = fncorp[i]-1;
259 //debug
260          if(workfc.myrank == rankdump) {
261              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
262          }
263 
264 //            assert(fncorp[i]>=0);
265             for (j=1; j<=nflow; j++) {
266               indexsetary[k] = nodetoinsert*nflow+(j-1);
267               assert(indexsetary[k]>=0);
268 //              assert(fncorp[i]>=0);
269               k = k+1;
270             }
271           }
272         }
273         else {
274           for (i=0; i<nshg ; i++) {
275             nodetoinsert = i;
276             for (j=1; j<=nflow; j++) {
277               indexsetary[k] = nodetoinsert*nflow+(j-1);
278               k = k+1;
279             }
280           }
281         }
282 
283 //  Create Vector Index Maps
284         petsc_n  = (PetscInt) nshg * (PetscInt) nflow;
285         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
286      	       PETSC_COPY_VALUES, &LocalIndexSet);
287       free(indexsetary);
288       }
289       if(genpar.lhs == 1) {
290       get_time((duration), (duration+1));
291       ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
292       ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
293       get_time((duration+2),(duration+3));
294       get_max_time_diff((duration), (duration+2),
295                         (duration+1), (duration+3),
296                         "MatAssembly \0"); // char(0))
297       get_time(duration, (duration+1));
298       }
299       if(firstpetsccall == 1) {
300         ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
301         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
302         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
303       }
304       ierr = VecZeroEntries(resP);
305       if(firstpetsccall == 1) {
306         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
307       }
308 
309       PetscRow=0;
310       k = 0;
311       int index;
312       for (i=0; i<nshg; i++ ){
313         for (j = 1; j<=nflow; j++){
314           index = i + (j-1)*nshg;
315           valtoinsert = res[index];
316           if(workfc.numpe > 1) {
317             PetscRow = (fncorp[i]-1)*nflow+(j-1);
318           }
319           else {
320             PetscRow = i*nflow+(j-1);
321           }
322           assert(fncorp[i]<=nshgt);
323           assert(fncorp[i]>0);
324           assert(PetscRow>=0);
325           assert(PetscRow<=nshgt*nflow);
326           ierr =  VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES);
327         }
328       }
329       ierr = VecAssemblyBegin(resP);
330       ierr = VecAssemblyEnd(resP);
331       ierr = VecNorm(resP,NORM_2,&resNrm);
332       get_time((duration+2), (duration+3));
333       get_max_time_diff((duration), (duration+2),
334                         (duration+1), (duration+3),
335                         "VectorWorkPre \0"); // char(0))
336 
337       get_time((duration),(duration+1));
338 
339       if(firstpetsccall == 1) {
340         ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
341         ierr = KSPSetOperators(ksp, lhsP, lhsP);
342         ierr = KSPGetPC(ksp, &pc);
343         ierr = PCSetType(pc, PCPBJACOBI);
344         PetscInt maxits;
345         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
346         ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
347         ierr = KSPSetFromOptions(ksp);
348       }
349       ierr = KSPSolve(ksp, resP, DyP);
350       get_time((duration+2),(duration+3));
351       get_max_time_diff((duration), (duration+2),
352                         (duration+1), (duration+3),
353                         "KSPSolve \0"); // char(0))
354       get_time((duration),(duration+1));
355       if(firstpetsccall == 1) {
356       ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7);
357       }
358       ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
359       ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
360       ierr = VecGetArray(DyPLocal, &parray);
361       PetscRow = 0;
362       for ( node = 0; node< nshg; node++) {
363         for (var = 1; var<= nflow; var++) {
364           index = node + (var-1)*nshg;
365           Dy[index] = parray[PetscRow];
366           PetscRow = PetscRow+1;
367         }
368       }
369       ierr = VecRestoreArray(DyPLocal, &parray);
370 
371       firstpetsccall = 0;
372 
373 // .... output the statistics
374 //
375       itrpar.iKs=0; // see rstat()
376       PetscInt its;
377       ierr = KSPGetIterationNumber(ksp, &its);
378       itrpar.iKs = (int) its;
379       get_time((duration+2),(duration+3));
380       get_max_time_diff((duration), (duration+2),
381                         (duration+1), (duration+3),
382                         "solWork \0"); // char(0))
383       itrpar.ntotGM += (int) its;
384       rstatp (&resNrm);
385 //
386 // .... end
387 //
388 }
389 
390 void     SolGMRpSclr(double* y,         double* ac,
391      	double* x,      double* elDw,   int* iBC,       double* BC,
392      	int* col,       int* row,
393      	int* iper,
394      	int* ilwork,    double* shp,       double* shgl,      double* shpb,
395      	double* shglb,  double* res,   double* Dy,     long long int* fncorp)
396 {
397 //
398 // ----------------------------------------------------------------------
399 //
400 //  This is the preconditioned GMRES driver routine.
401 //
402 // input:
403 //  y      (nshg,ndof)           : Y-variables at n+alpha_v
404 //  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
405 //  yold   (nshg,ndof)           : Y-variables at beginning of step
406 //  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
407 //  x      (numnp,nsd)            : node coordinates
408 //  iBC    (nshg)                : BC codes
409 //  BC     (nshg,ndofBC)         : BC constraint parameters
410 //  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
411 //  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
412 //
413 // ----------------------------------------------------------------------
414 //
415 
416 
417 // Get variables from common_c.h
418       int nshg, nflow, nsd, iownnodes;
419       nshg  = conpar.nshg;
420       nsd = NSD;
421       iownnodes = conpar.iownnodes;
422 
423       gcorp_t nshgt;
424       gcorp_t mbeg;
425       gcorp_t mend;
426       nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
427       mbeg = (gcorp_t) newdim.minowned;
428       mend = (gcorp_t) newdim.maxowned;
429 
430 
431       int node, element, var, eqn;
432       double valtoinsert;
433       int nenl, iel, lelCat, lcsyst, iorder, nshl;
434       int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
435       double DyFlats[nshg];
436       double DyFlatPhastas[nshg];
437       double rmes[nshg];
438 // DEBUG
439       int i,j,k,l,m;
440 
441       double  real_rtol, real_abstol, real_dtol;
442       double *parray;
443       PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
444       PetscInt petsc_n;
445       PetscOne = 1;
446       uint64_t duration[4];
447 
448 
449 //
450 //
451 // .... *******************>> Element Data Formation <<******************
452 //
453 //
454 // .... set the parameters for flux and surface tension calculations
455 //
456 //
457       genpar.idflx = 0 ;
458       if(genpar.idiff >= 1)  genpar.idflx= genpar.idflx + (nflow-1) * nsd;
459       if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
460 
461 
462 
463       gcorp_t glbNZ;
464 
465       if(firstpetsccalls == 1) {
466         PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
467         PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
468         for(i=0;i<iownnodes;i++) {
469              idiagnz[i]=0;
470              iodiagnz[i]=0;
471         }
472         i=0;
473         for(k=0;k<nshg;k++) {
474           if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
475 // this node is not owned by this rank so we skip
476           } else {
477            for(j=col[i]-1;j<col[i+1]-1;j++) {
478              glbNZ=fncorp[row[j]-1];
479              if((glbNZ < mbeg) || (glbNZ > mend)) {
480                 iodiagnz[i]++;
481              } else {
482                 idiagnz[i]++;
483              }
484            }
485            i++;
486           }
487         }
488         gcorp_t mind=1000;
489         gcorp_t mino=1000;
490         gcorp_t maxd=0;
491         gcorp_t maxo=0;
492         for(i=0;i<iownnodes;i++) {
493            mind=min(mind,idiagnz[i]);
494            mino=min(mino,iodiagnz[i]);
495            maxd=max(maxd,idiagnz[i]);
496            maxo=max(maxo,iodiagnz[i]);
497         }
498 
499         for(i=0;i<iownnodes;i++) {
500            iodiagnz[i]=1.3*maxd;
501            idiagnz[i]=1.3*maxd;
502         }
503 
504 
505 
506 //       }
507 //       if(firstpetsccalls == 1) {
508 
509         petsc_m  = (PetscInt) iownnodes;
510         petsc_M  = (PetscInt) nshgt;
511         petsc_PA  = (PetscInt) 40;
512 
513         ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
514                             0, idiagnz, 0, iodiagnz, &lhsPs);
515 
516         ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
517 
518 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
519 #ifdef HIDEJEDBROWN
520         ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
521 #endif
522         ierr = MatSetUp(lhsPs);
523 
524       PetscInt myMatStart, myMatEnd;
525       ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd);
526       if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
527       }
528 //      MPI_Barrier(MPI_COMM_WORLD);
529  //     if(workfc.myrank ==0) printf("Before MatZeroEntries  \n");
530       if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
531 
532       get_time(duration, (duration+1));
533 
534 //      MPI_Barrier(MPI_COMM_WORLD);
535  //     if(workfc.myrank ==0) printf("Before elmgmr  \n");
536 //      for (i=0; i<nshg ; i++) {
537 //            assert(fncorp[i]>0);
538 //      }
539 
540       ElmGMRPETScSclr(y,             ac,            x, elDw,
541                   shp,           shgl,          iBC,
542                   BC,            shpb,
543                   shglb,         res,
544                   rmes,                   iper,
545                   ilwork,        &lhsPs);
546        if(firstpetsccalls == 1) {
547       // Setup IndexSet. For now, we mimic vector insertion procedure
548       // Since we always reference by global indexes this doesn't matter
549       // except for cache performance)
550       // TODO: Better arrangment?
551 
552       PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg);
553       PetscInt nodetoinsert;
554 
555         nodetoinsert = 0;
556         k=0;
557 
558 //debug
559          if(workfc.myrank == rankdump) {
560              printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
561              printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
562            }
563 
564         if(workfc.numpe > 1) {
565           for (i=0; i<nshg ; i++) {
566             nodetoinsert = fncorp[i]-1;
567 //debug
568          if(workfc.myrank == rankdump) {
569              printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
570          }
571 
572             indexsetarys[k] = nodetoinsert;
573             k = k+1;
574           }
575         }
576         else {
577           for (i=0; i<nshg ; i++) {
578             nodetoinsert = i;
579             indexsetarys[k] = nodetoinsert;
580             k = k+1;
581           }
582         }
583 
584 //  Create Vector Index Maps
585         petsc_n  = (PetscInt) nshg;
586         ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
587      	       PETSC_COPY_VALUES, &LocalIndexSets);
588       free(indexsetarys);
589       }
590       if(genpar.lhs ==1) {
591       ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
592       ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
593       }
594       if(firstpetsccalls == 1) {
595         ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
596         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
597         ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
598       }
599       ierr = VecZeroEntries(resPs);
600       if(firstpetsccalls == 1) {
601         ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
602       }
603 
604       PetscRow=0;
605       k = 0;
606       int index;
607       for (i=0; i<nshg; i++ ){
608           valtoinsert = res[i];
609           if(workfc.numpe > 1) {
610             PetscRow = (fncorp[i]-1);
611           }
612           else {
613             PetscRow = i-1;
614           }
615           assert(fncorp[i]<=nshgt);
616           assert(fncorp[i]>0);
617           assert(PetscRow>=0);
618           assert(PetscRow<=nshgt);
619           ierr =  VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES);
620       }
621       ierr = VecAssemblyBegin(resPs);
622       ierr = VecAssemblyEnd(resPs);
623       ierr = VecNorm(resPs,NORM_2,&resNrm);
624 
625       if(firstpetsccalls == 1) {
626         ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
627         ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
628         ierr = KSPGetPC(ksps, &pcs);
629         ierr = PCSetType(pcs, PCPBJACOBI);
630         PetscInt maxits;
631         maxits = (PetscInt)  solpar.nGMRES * (PetscInt) solpar.Kspace;
632         ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
633         ierr = KSPSetFromOptions(ksps);
634       }
635       ierr = KSPSolve(ksps, resPs, DyPs);
636       if(firstpetsccalls == 1) {
637       ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s);
638       }
639       ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
640       ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
641       ierr = VecGetArray(DyPLocals, &parray);
642       PetscRow = 0;
643       for ( node = 0; node< nshg; node++) {
644           index = node;
645           Dy[index] = parray[PetscRow];
646           PetscRow = PetscRow+1;
647       }
648       ierr = VecRestoreArray(DyPLocals, &parray);
649 
650       firstpetsccalls = 0;
651 
652 // .... output the statistics
653 //
654 //      itrpar.iKss=0; // see rstat()
655       PetscInt its;
656       ierr = KSPGetIterationNumber(ksps, &its);
657       itrpar.iKss = (int) its;
658       itrpar.ntotGMs += (int) its;
659       rstatpSclr (&resNrm);
660 //
661 // .... end
662 //
663 }
664 #endif
665