xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision a82fdc85d2a67a16ef21d18fe5553b3e7e3217c7)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
14   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
15   PetscTruth useparallelmat;
16   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
17   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
18                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
19   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
20   PetscInt   color;                /* color of processors in a subcommunicator */
21 } PC_Redundant;
22 
23 #include "src/sys/viewer/impls/ascii/asciiimpl.h"  /*I     "petsc.h"   I*/
24 #include "petscfix.h"
25 #include <stdarg.h>
26 
27 PetscErrorCode PETSC_DLLEXPORT PetscViewerRestoreSubcomm(PetscViewer viewer,PetscViewer *outviewer)
28 {
29   PetscErrorCode ierr;
30   PetscMPIInt    size;
31 
32   PetscFunctionBegin;
33   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_COOKIE,1);
34 
35   ierr = MPI_Comm_size(viewer->comm,&size);CHKERRQ(ierr);
36   if (size == 1) {
37     ierr = PetscObjectDereference((PetscObject)viewer);CHKERRQ(ierr);
38     if (outviewer) *outviewer = 0;
39   } else if (viewer->ops->restoresingleton) {
40     ierr = (*viewer->ops->restoresingleton)(viewer,outviewer);CHKERRQ(ierr);
41   }
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PetscViewerDestroy_ASCIIh"
47 PetscErrorCode PetscViewerDestroy_ASCIIh(PetscViewer viewer)
48 {
49   PetscMPIInt       rank;
50   PetscErrorCode    ierr;
51   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
52   PetscViewerLink   *vlink;
53   PetscTruth        flg;
54 
55   PetscFunctionBegin;
56   if (vascii->sviewer) {
57     SETERRQ(PETSC_ERR_ORDER,"ASCII PetscViewer destroyed before restoring singleton PetscViewer");
58   }
59   ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr);
60   if (!rank && vascii->fd != stderr && vascii->fd != stdout) {
61     if (vascii->fd) fclose(vascii->fd);
62     if (vascii->storecompressed) {
63       char par[PETSC_MAX_PATH_LEN],buf[PETSC_MAX_PATH_LEN];
64       FILE *fp;
65       ierr = PetscStrcpy(par,"gzip ");CHKERRQ(ierr);
66       ierr = PetscStrcat(par,vascii->filename);CHKERRQ(ierr);
67 #if defined(PETSC_HAVE_POPEN)
68       ierr = PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,par,"r",&fp);CHKERRQ(ierr);
69       if (fgets(buf,1024,fp)) {
70         SETERRQ2(PETSC_ERR_LIB,"Error from compression command %s\n%s",par,buf);
71       }
72       ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
73 #else
74       SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
75 #endif
76     }
77   }
78   ierr = PetscStrfree(vascii->filename);CHKERRQ(ierr);
79   ierr = PetscFree(vascii);CHKERRQ(ierr);
80 
81   /* remove the viewer from the list in the MPI Communicator */
82   if (Petsc_Viewer_keyval == MPI_KEYVAL_INVALID) {
83     ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelViewer,&Petsc_Viewer_keyval,(void*)0);CHKERRQ(ierr);
84   }
85 
86   ierr = MPI_Attr_get(viewer->comm,Petsc_Viewer_keyval,(void**)&vlink,(PetscMPIInt*)&flg);CHKERRQ(ierr);
87   if (flg) {
88     if (vlink && vlink->viewer == viewer) {
89       ierr = MPI_Attr_put(viewer->comm,Petsc_Viewer_keyval,vlink->next);CHKERRQ(ierr);
90       ierr = PetscFree(vlink);CHKERRQ(ierr);
91     } else {
92       while (vlink && vlink->next) {
93         if (vlink->next->viewer == viewer) {
94           PetscViewerLink *nv = vlink->next;
95           vlink->next = vlink->next->next;
96           ierr = PetscFree(nv);CHKERRQ(ierr);
97         }
98         vlink = vlink->next;
99       }
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "PetscViewerDestroy_ASCII_Subcomm"
107 PetscErrorCode PetscViewerDestroy_ASCII_Subcomm(PetscViewer viewer)
108 {
109   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
110   PetscErrorCode    ierr;
111   PetscFunctionBegin;
112   ierr = PetscViewerRestoreSubcomm(vascii->bviewer,&viewer);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PetscViewerFlush_ASCII_Subcomm_0"
118 PetscErrorCode PetscViewerFlush_ASCII_Subcomm_0(PetscViewer viewer)
119 {
120   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
121 
122   PetscFunctionBegin;
123   fflush(vascii->fd);
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "PetscViewerGetSubcomm_ASCII"
129 PetscErrorCode PetscViewerGetSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
130 {
131   PetscMPIInt       rank;
132   PetscErrorCode    ierr;
133   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data,*ovascii;
134   const char        *name;
135 
136   PetscFunctionBegin;
137   if (vascii->sviewer) {
138     SETERRQ(PETSC_ERR_ORDER,"Subcomm already obtained from PetscViewer and not restored");
139   }
140   /* ierr         = PetscViewerCreate(PETSC_COMM_SELF,outviewer);CHKERRQ(ierr); */
141   ierr         = PetscViewerCreate(subcomm,outviewer);CHKERRQ(ierr);
142   ierr         = PetscViewerSetType(*outviewer,PETSC_VIEWER_ASCII);CHKERRQ(ierr);
143   ovascii      = (PetscViewer_ASCII*)(*outviewer)->data;
144   ovascii->fd  = vascii->fd;
145   ovascii->tab = vascii->tab;
146 
147   vascii->sviewer = *outviewer;
148 
149   (*outviewer)->format     = viewer->format;
150   (*outviewer)->iformat    = viewer->iformat;
151 
152   ierr = PetscObjectGetName((PetscObject)viewer,&name);CHKERRQ(ierr);
153   ierr = PetscObjectSetName((PetscObject)(*outviewer),name);CHKERRQ(ierr);
154 
155   ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr);
156   ((PetscViewer_ASCII*)((*outviewer)->data))->bviewer = viewer;
157   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCII_Subcomm;
158   if (rank) {
159     (*outviewer)->ops->flush = 0;
160   } else {
161     (*outviewer)->ops->flush = PetscViewerFlush_ASCII_Subcomm_0;
162   }
163   PetscFunctionReturn(0);
164 }
165 
166 #undef __FUNCT__
167 #define __FUNCT__ "PetscViewerRestoreSubcomm_ASCII"
168 PetscErrorCode PetscViewerRestoreSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
169 {
170   PetscErrorCode    ierr;
171   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)(*outviewer)->data;
172   PetscViewer_ASCII *ascii  = (PetscViewer_ASCII *)viewer->data;
173 
174   PetscFunctionBegin;
175   if (!ascii->sviewer) {
176     SETERRQ(PETSC_ERR_ORDER,"Subcomm never obtained from PetscViewer");
177   }
178   if (ascii->sviewer != *outviewer) {
179     SETERRQ(PETSC_ERR_ARG_WRONG,"This PetscViewer did not generate singleton");
180   }
181 
182   ascii->sviewer             = 0;
183   vascii->fd                 = stdout;
184   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCIIh;
185   ierr                       = PetscViewerDestroy(*outviewer);CHKERRQ(ierr);
186   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "PCView_Redundant"
192 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
193 {
194   PC_Redundant   *red = (PC_Redundant*)pc->data;
195   PetscErrorCode ierr;
196   PetscMPIInt    rank;
197   PetscTruth     iascii,isstring;
198   PetscViewer    sviewer,subviewer;
199   PetscInt       color = red->color;
200 
201   PetscFunctionBegin;
202   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
205   if (iascii) {
206     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
207     ierr = PetscViewerGetSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
208     /* ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); */
209     if (!color) {
210       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
211       ierr = PCView(red->pc,subviewer);CHKERRQ(ierr);
212       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
213     }
214     /* ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); */
215     ierr = PetscViewerRestoreSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
216   } else if (isstring) {
217     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
218     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
219     if (!rank) {
220       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
221     }
222     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
223   } else {
224     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
225   }
226   PetscFunctionReturn(0);
227 }
228 
229 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
230 #include "private/vecimpl.h"
231 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
232 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
233 
234 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
235   PetscInt       nzlocal,nsends,nrecvs;
236   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
237   PetscScalar    *sbuf_a,**rbuf_a;
238   PetscErrorCode (*MatDestroy)(Mat);
239 } Mat_Redundant;
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant"
243 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr)
244 {
245   PetscErrorCode       ierr;
246   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
247   PetscInt             i;
248 
249   PetscFunctionBegin;
250   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
251   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
252   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
253   for (i=0; i<redund->nrecvs; i++){
254     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
255     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
256   }
257   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
258   ierr = PetscFree(redund);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatDestroy_MatRedundant"
264 PetscErrorCode MatDestroy_MatRedundant(Mat A)
265 {
266   PetscErrorCode       ierr;
267   PetscObjectContainer container;
268   Mat_Redundant        *redund=PETSC_NULL;
269 
270   PetscFunctionBegin;
271   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
272   if (container) {
273     ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
274   } else {
275     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
276   }
277   A->ops->destroy = redund->MatDestroy;
278   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
279   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
280   ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatGetRedundantMatrix"
286 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
287 {
288   PetscMPIInt    rank,size;
289   MPI_Comm       comm=mat->comm;
290   PetscErrorCode ierr;
291   PetscInt       nsends,nrecvs,i,prid=100,rownz_max;
292   PetscMPIInt    *send_rank,*recv_rank;
293   PetscInt       *rowrange=mat->rmap.range;
294   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
295   Mat            A=aij->A,B=aij->B,C=*matredundant;
296   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
297   PetscScalar    *sbuf_a;
298   PetscInt       nzlocal=a->nz+b->nz;
299   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
300   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
301   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
302   PetscScalar    *vals,*aworkA,*aworkB;
303   PetscMPIInt    tag1,tag2,tag3,imdex;
304   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
305   MPI_Status     recv_status,*send_status;
306   PetscInt       *sbuf_nz,*rbuf_nz,count;
307   PetscInt       **rbuf_j;
308   PetscScalar    **rbuf_a;
309   Mat_Redundant  *redund=PETSC_NULL;
310   PetscObjectContainer container;
311 
312   PetscFunctionBegin;
313   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
314   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
316 
317   if (reuse == MAT_REUSE_MATRIX) {
318     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
319     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
320     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
321     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
322     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
323     if (container) {
324       ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
325     } else {
326       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
327     }
328     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
329 
330     nsends    = redund->nsends;
331     nrecvs    = redund->nrecvs;
332     send_rank = redund->send_rank; recv_rank = send_rank + size;
333     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
334     sbuf_j    = redund->sbuf_j;
335     sbuf_a    = redund->sbuf_a;
336     rbuf_j    = redund->rbuf_j;
337     rbuf_a    = redund->rbuf_a;
338   }
339 
340   if (reuse == MAT_INITIAL_MATRIX){
341     PetscMPIInt  subrank,subsize;
342     PetscInt     nleftover,np_subcomm;
343     /* get the destination processors' id send_rank, nsends and nrecvs */
344     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
345     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
346     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
347     recv_rank = send_rank + size;
348     np_subcomm = size/nsubcomm;
349     nleftover  = size - nsubcomm*np_subcomm;
350     nsends = 0; nrecvs = 0;
351     for (i=0; i<size; i++){ /* i=rank*/
352       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
353         send_rank[nsends] = i; nsends++;
354         recv_rank[nrecvs++] = i;
355       }
356     }
357     if (rank >= size - nleftover){/* this proc is a leftover processor */
358       i = size-nleftover-1;
359       j = 0;
360       while (j < nsubcomm - nleftover){
361         send_rank[nsends++] = i;
362         i--; j++;
363       }
364     }
365 
366     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
367       for (i=0; i<nleftover; i++){
368         recv_rank[nrecvs++] = size-nleftover+i;
369       }
370     }
371 
372     /* allocate sbuf_j, sbuf_a */
373     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
374     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
375     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
376   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
377 
378   /* copy mat's local entries into the buffers */
379   if (reuse == MAT_INITIAL_MATRIX){
380     rownz_max = 0;
381     rptr = sbuf_j;
382     cols = sbuf_j + rend-rstart + 1;
383     vals = sbuf_a;
384     rptr[0] = 0;
385     for (i=0; i<rend-rstart; i++){
386       row = i + rstart;
387       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
388       ncols  = nzA + nzB;
389       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
390       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
391       /* load the column indices for this row into cols */
392       lwrite = 0;
393       for (l=0; l<nzB; l++) {
394         if ((ctmp = bmap[cworkB[l]]) < cstart){
395           vals[lwrite]   = aworkB[l];
396           cols[lwrite++] = ctmp;
397         }
398       }
399       for (l=0; l<nzA; l++){
400         vals[lwrite]   = aworkA[l];
401         cols[lwrite++] = cstart + cworkA[l];
402       }
403       for (l=0; l<nzB; l++) {
404         if ((ctmp = bmap[cworkB[l]]) >= cend){
405           vals[lwrite]   = aworkB[l];
406           cols[lwrite++] = ctmp;
407         }
408       }
409       vals += ncols;
410       cols += ncols;
411       rptr[i+1] = rptr[i] + ncols;
412       if (rownz_max < ncols) rownz_max = ncols;
413     }
414     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
415   } else { /* only copy matrix values into sbuf_a */
416     rptr = sbuf_j;
417     vals = sbuf_a;
418     rptr[0] = 0;
419     for (i=0; i<rend-rstart; i++){
420       row = i + rstart;
421       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
422       ncols  = nzA + nzB;
423       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
424       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
425       lwrite = 0;
426       for (l=0; l<nzB; l++) {
427         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
428       }
429       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
430       for (l=0; l<nzB; l++) {
431         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
432       }
433       vals += ncols;
434       rptr[i+1] = rptr[i] + ncols;
435     }
436   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
437 
438   /* send nzlocal to others, and recv other's nzlocal */
439   /*--------------------------------------------------*/
440   if (reuse == MAT_INITIAL_MATRIX){
441     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
442     s_waits2 = s_waits3 + nsends;
443     s_waits1 = s_waits2 + nsends;
444     r_waits1 = s_waits1 + nsends;
445     r_waits2 = r_waits1 + nrecvs;
446     r_waits3 = r_waits2 + nrecvs;
447   } else {
448     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
449     r_waits3 = s_waits3 + nsends;
450   }
451 
452   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
453   if (reuse == MAT_INITIAL_MATRIX){
454     /* get new tags to keep the communication clean */
455     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
456     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
457     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
458     rbuf_nz = sbuf_nz + nsends;
459 
460     /* post receives of other's nzlocal */
461     for (i=0; i<nrecvs; i++){
462       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
463     }
464     /* send nzlocal to others */
465     for (i=0; i<nsends; i++){
466       sbuf_nz[i] = nzlocal;
467       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
468     }
469     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
470     count = nrecvs;
471     while (count) {
472       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
473       recv_rank[imdex] = recv_status.MPI_SOURCE;
474       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
475       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
476 
477       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
478       rbuf_nz[imdex] += i + 2;
479       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
480       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
481       count--;
482     }
483     /* wait on sends of nzlocal */
484     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
485     /* send mat->i,j to others, and recv from other's */
486     /*------------------------------------------------*/
487     for (i=0; i<nsends; i++){
488       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
489       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
490     }
491     /* wait on receives of mat->i,j */
492     /*------------------------------*/
493     count = nrecvs;
494     while (count) {
495       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
496       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
497       count--;
498     }
499     /* wait on sends of mat->i,j */
500     /*---------------------------*/
501     if (nsends) {
502       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
503     }
504   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
505 
506   /* post receives, send and receive mat->a */
507   /*----------------------------------------*/
508   for (imdex=0; imdex<nrecvs; imdex++) {
509     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
510   }
511   for (i=0; i<nsends; i++){
512     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
513   }
514   count = nrecvs;
515   while (count) {
516     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
517     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
518     count--;
519   }
520   if (nsends) {
521     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
522   }
523 
524   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
525 
526   /* create redundant matrix */
527   /*-------------------------*/
528   if (reuse == MAT_INITIAL_MATRIX){
529     /* compute rownz_max for preallocation */
530     for (imdex=0; imdex<nrecvs; imdex++){
531       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
532       rptr = rbuf_j[imdex];
533       for (i=0; i<j; i++){
534         ncols = rptr[i+1] - rptr[i];
535         if (rownz_max < ncols) rownz_max = ncols;
536       }
537     }
538 
539     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
540     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
541     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
542     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
543     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
544   } else {
545     C = *matredundant;
546   }
547 
548   /* insert local matrix entries */
549   rptr = sbuf_j;
550   cols = sbuf_j + rend-rstart + 1;
551   vals = sbuf_a;
552   for (i=0; i<rend-rstart; i++){
553     row   = i + rstart;
554     ncols = rptr[i+1] - rptr[i];
555     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
556     vals += ncols;
557     cols += ncols;
558   }
559   /* insert received matrix entries */
560   for (imdex=0; imdex<nrecvs; imdex++){
561     rstart = rowrange[recv_rank[imdex]];
562     rend   = rowrange[recv_rank[imdex]+1];
563     rptr = rbuf_j[imdex];
564     cols = rbuf_j[imdex] + rend-rstart + 1;
565     vals = rbuf_a[imdex];
566     for (i=0; i<rend-rstart; i++){
567       row   = i + rstart;
568       ncols = rptr[i+1] - rptr[i];
569       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
570       vals += ncols;
571       cols += ncols;
572     }
573   }
574   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
577   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
578   if (reuse == MAT_INITIAL_MATRIX){
579     PetscObjectContainer container;
580     *matredundant = C;
581     /* create a supporting struct and attach it to C for reuse */
582     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
583     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
584     ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr);
585     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
586     ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr);
587 
588     redund->nzlocal = nzlocal;
589     redund->nsends  = nsends;
590     redund->nrecvs  = nrecvs;
591     redund->send_rank = send_rank;
592     redund->sbuf_nz = sbuf_nz;
593     redund->sbuf_j  = sbuf_j;
594     redund->sbuf_a  = sbuf_a;
595     redund->rbuf_j  = rbuf_j;
596     redund->rbuf_a  = rbuf_a;
597 
598     redund->MatDestroy = C->ops->destroy;
599     C->ops->destroy    = MatDestroy_MatRedundant;
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "PCSetUp_Redundant"
606 static PetscErrorCode PCSetUp_Redundant(PC pc)
607 {
608   PC_Redundant   *red  = (PC_Redundant*)pc->data;
609   PetscErrorCode ierr;
610   PetscInt       mstart,mend,mlocal,m;
611   PetscMPIInt    size;
612   MatReuse       reuse = MAT_INITIAL_MATRIX;
613   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
614   MPI_Comm       comm;
615   Vec            vec;
616 
617   PetscInt       mlocal_sub;
618   PetscMPIInt    subsize,subrank;
619   PetscInt       rstart_sub,rend_sub,mloc_sub;
620 
621   PetscFunctionBegin;
622   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
623   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
624   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
625   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
626   if (!pc->setupcalled) {
627     /* create working vectors xsub/ysub and xdup/ydup */
628     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
629     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
630 
631     /* get local size of xsub/ysub */
632     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
633     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
634     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
635     if (subrank+1 < subsize){
636       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
637     } else {
638       rend_sub = m;
639     }
640     mloc_sub = rend_sub - rstart_sub;
641     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
642     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
643     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
644 
645     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
646        Note: we use communicator dupcomm, not pc->comm! */
647     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
648     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
649 
650     /* create vec scatters */
651     if (!red->scatterin){
652       IS       is1,is2;
653       PetscInt *idx1,*idx2,i,j,k;
654       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
655       idx2 = idx1 + red->nsubcomm*mlocal;
656       j = 0;
657       for (k=0; k<red->nsubcomm; k++){
658         for (i=mstart; i<mend; i++){
659           idx1[j]   = i;
660           idx2[j++] = i + m*k;
661         }
662       }
663       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
664       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
665       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
666       ierr = ISDestroy(is1);CHKERRQ(ierr);
667       ierr = ISDestroy(is2);CHKERRQ(ierr);
668 
669       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
670       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
671       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
672       ierr = ISDestroy(is1);CHKERRQ(ierr);
673       ierr = ISDestroy(is2);CHKERRQ(ierr);
674       ierr = PetscFree(idx1);CHKERRQ(ierr);
675     }
676   }
677   ierr = VecDestroy(vec);CHKERRQ(ierr);
678 
679   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
681   if (size == 1) {
682     red->useparallelmat = PETSC_FALSE;
683   }
684 
685   if (red->useparallelmat) {
686     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
687       /* destroy old matrices */
688       if (red->pmats) {
689         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
690       }
691     } else if (pc->setupcalled == 1) {
692       reuse = MAT_REUSE_MATRIX;
693       str   = SAME_NONZERO_PATTERN;
694     }
695 
696     /* grab the parallel matrix and put it into processors of a subcomminicator */
697     /*--------------------------------------------------------------------------*/
698     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
699     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
700     /* tell PC of the subcommunicator its operators */
701     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
702   } else {
703     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
704   }
705   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
706   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "PCApply_Redundant"
712 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
713 {
714   PC_Redundant   *red = (PC_Redundant*)pc->data;
715   PetscErrorCode ierr;
716   PetscScalar    *array;
717 
718   PetscFunctionBegin;
719   /* scatter x to xdup */
720   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
721   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
722 
723   /* place xdup's local array into xsub */
724   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
725   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
726 
727   /* apply preconditioner on each processor */
728   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
729   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
730   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
731 
732   /* place ysub's local array into ydup */
733   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
734   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
735 
736   /* scatter ydup to y */
737   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
738   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
739   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
740   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "PCDestroy_Redundant"
746 static PetscErrorCode PCDestroy_Redundant(PC pc)
747 {
748   PC_Redundant   *red = (PC_Redundant*)pc->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
753   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
754   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
755   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
756   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
757   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
758   if (red->pmats) {
759     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
760   }
761 
762 
763   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
764   ierr = PetscFree(red);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "PCSetFromOptions_Redundant"
770 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
771 {
772   PetscFunctionBegin;
773   PetscFunctionReturn(0);
774 }
775 
776 EXTERN_C_BEGIN
777 #undef __FUNCT__
778 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
779 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
780 {
781   PC_Redundant   *red = (PC_Redundant*)pc->data;
782   PetscErrorCode ierr;
783 
784   PetscFunctionBegin;
785   red->scatterin  = in;
786   red->scatterout = out;
787   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
788   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 EXTERN_C_END
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "PCRedundantSetScatter"
795 /*@
796    PCRedundantSetScatter - Sets the scatter used to copy values into the
797      redundant local solve and the scatter to move them back into the global
798      vector.
799 
800    Collective on PC
801 
802    Input Parameters:
803 +  pc - the preconditioner context
804 .  in - the scatter to move the values in
805 -  out - the scatter to move them out
806 
807    Level: advanced
808 
809 .keywords: PC, redundant solve
810 @*/
811 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
812 {
813   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
814 
815   PetscFunctionBegin;
816   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
817   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
818   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
819   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
820   if (f) {
821     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 EXTERN_C_BEGIN
827 #undef __FUNCT__
828 #define __FUNCT__ "PCRedundantGetPC_Redundant"
829 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
830 {
831   PC_Redundant *red = (PC_Redundant*)pc->data;
832 
833   PetscFunctionBegin;
834   *innerpc = red->pc;
835   PetscFunctionReturn(0);
836 }
837 EXTERN_C_END
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "PCRedundantGetPC"
841 /*@
842    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
843 
844    Not Collective
845 
846    Input Parameter:
847 .  pc - the preconditioner context
848 
849    Output Parameter:
850 .  innerpc - the sequential PC
851 
852    Level: advanced
853 
854 .keywords: PC, redundant solve
855 @*/
856 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
857 {
858   PetscErrorCode ierr,(*f)(PC,PC*);
859 
860   PetscFunctionBegin;
861   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
862   PetscValidPointer(innerpc,2);
863   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
864   if (f) {
865     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 EXTERN_C_BEGIN
871 #undef __FUNCT__
872 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
873 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
874 {
875   PC_Redundant *red = (PC_Redundant*)pc->data;
876 
877   PetscFunctionBegin;
878   if (mat)  *mat  = red->pmats;
879   if (pmat) *pmat = red->pmats;
880   PetscFunctionReturn(0);
881 }
882 EXTERN_C_END
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "PCRedundantGetOperators"
886 /*@
887    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
888 
889    Not Collective
890 
891    Input Parameter:
892 .  pc - the preconditioner context
893 
894    Output Parameters:
895 +  mat - the matrix
896 -  pmat - the (possibly different) preconditioner matrix
897 
898    Level: advanced
899 
900 .keywords: PC, redundant solve
901 @*/
902 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
903 {
904   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
905 
906   PetscFunctionBegin;
907   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
908   if (mat)  PetscValidPointer(mat,2);
909   if (pmat) PetscValidPointer(pmat,3);
910   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
911   if (f) {
912     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 /* -------------------------------------------------------------------------------------*/
918 /*MC
919      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
920 
921 
922      Options for the redundant preconditioners can be set with -redundant_pc_xxx
923 
924    Level: intermediate
925 
926 
927 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
928            PCRedundantGetPC(), PCRedundantGetOperators()
929 
930 M*/
931 
932 EXTERN_C_BEGIN
933 #undef __FUNCT__
934 #define __FUNCT__ "PCCreate_Redundant"
935 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
936 {
937   PetscErrorCode ierr;
938   PC_Redundant   *red;
939   const char     *prefix;
940   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
941   PetscMPIInt    rank,size,subrank,*subsize,duprank;
942   MPI_Comm       subcomm=0,dupcomm=0;
943 
944   PetscFunctionBegin;
945   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
946   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
947   red->useparallelmat   = PETSC_TRUE;
948 
949   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
950   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
951   nsubcomm = size;
952   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
953   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
954 
955   /*--------------------------------------------------------------------------------------------------
956     To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a
957     processe into a subcomm.
958     An example: size=4, nsubcomm=3
959      pc->comm:
960       rank:     [0]  [1]  [2]  [3]
961       color:     0    1    2    0
962 
963      subcomm:
964       subrank:  [0]  [0]  [0]  [1]
965 
966      dupcomm:
967       duprank:  [0]  [2]  [3]  [1]
968 
969      Here, subcomm[color = 0] has subsize=2, owns process [0] and [3]
970            subcomm[color = 1] has subsize=1, owns process [1]
971            subcomm[color = 2] has subsize=1, owns process [2]
972           dupcomm has same number of processes as pc->comm, and its duprank maps
973           processes in subcomm contiguously into a 1d array:
974             duprank: [0] [1]      [2]         [3]
975             rank:    [0] [3]      [1]         [2]
976                     subcomm[0] subcomm[1]  subcomm[2]
977    ----------------------------------------------------------------------------------------*/
978 
979   /* get size of each subcommunicators */
980   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
981   np_subcomm = size/nsubcomm;
982   nleftover  = size - nsubcomm*np_subcomm;
983   for (i=0; i<nsubcomm; i++){
984     subsize[i] = np_subcomm;
985     if (i<nleftover) subsize[i]++;
986   }
987 
988   /* find color for this proc */
989   color   = rank%nsubcomm;
990   subrank = rank/nsubcomm;
991 
992   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
993   red->subcomm  = subcomm;
994   red->color    = color;
995   red->nsubcomm = nsubcomm;
996 
997   j = 0; duprank = 0;
998   for (i=0; i<nsubcomm; i++){
999     if (j == color){
1000       duprank += subrank;
1001       break;
1002     }
1003     duprank += subsize[i]; j++;
1004   }
1005 
1006   /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
1007   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
1008   red->dupcomm = dupcomm;
1009   ierr = PetscFree(subsize);CHKERRQ(ierr);
1010   /* if (rank == 0) printf("[%d] subrank %d, duprank: %d\n",rank,subrank,duprank); */
1011 
1012   /* create the sequential PC that each processor has copy of */
1013   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
1014   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
1015   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
1016   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
1017   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
1018 
1019   pc->ops->apply             = PCApply_Redundant;
1020   pc->ops->applytranspose    = 0;
1021   pc->ops->setup             = PCSetUp_Redundant;
1022   pc->ops->destroy           = PCDestroy_Redundant;
1023   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
1024   pc->ops->setuponblocks     = 0;
1025   pc->ops->view              = PCView_Redundant;
1026   pc->ops->applyrichardson   = 0;
1027 
1028   pc->data     = (void*)red;
1029 
1030   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
1031                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
1032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
1033                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
1034   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
1035                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
1036 
1037   PetscFunctionReturn(0);
1038 }
1039 EXTERN_C_END
1040