xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision b3804887f4a4eb3efd03445267ed7f92fe61abdd)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
14   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
15   PetscTruth useparallelmat;
16   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
17   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
18                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
19   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
20   PetscInt   color;                /* color of processors in a subcommunicator */
21 } PC_Redundant;
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "PCView_Redundant"
25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
26 {
27   PC_Redundant   *red = (PC_Redundant*)pc->data;
28   PetscErrorCode ierr;
29   PetscMPIInt    rank;
30   PetscTruth     iascii,isstring;
31   PetscViewer    sviewer;
32 
33   PetscFunctionBegin;
34   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
35   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
37   if (iascii) {
38     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
39     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
40     if (!rank) {
41       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
42       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
43       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
44     }
45     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
46   } else if (isstring) {
47     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
48     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
49     if (!rank) {
50       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
51     }
52     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
53   } else {
54     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
60 #include "private/vecimpl.h"
61 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
62 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
63 
64 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
65   PetscInt       nzlocal,nsends,nrecvs;
66   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
67   PetscScalar    *sbuf_a,**rbuf_a;
68   PetscErrorCode (*MatDestroy)(Mat);
69 } Mat_Redundant;
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant"
73 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr)
74 {
75   PetscErrorCode       ierr;
76   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
77   PetscInt             i;
78 
79   PetscFunctionBegin;
80   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
81   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
82   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
83   for (i=0; i<redund->nrecvs; i++){
84     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
85     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
86   }
87   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
88   ierr = PetscFree(redund);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 #undef __FUNCT__
93 #define __FUNCT__ "MatDestroy_MatRedundant"
94 PetscErrorCode MatDestroy_MatRedundant(Mat A)
95 {
96   PetscErrorCode       ierr;
97   PetscObjectContainer container;
98   Mat_Redundant        *redund=PETSC_NULL;
99 
100   PetscFunctionBegin;
101   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
102   if (container) {
103     ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
104   } else {
105     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
106   }
107   A->ops->destroy = redund->MatDestroy;
108   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
109   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
110   ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatGetRedundantMatrix"
116 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
117 {
118   PetscMPIInt    rank,size;
119   MPI_Comm       comm=mat->comm;
120   PetscErrorCode ierr;
121   PetscInt       nsends,nrecvs,i,prid=100,rownz_max;
122   PetscMPIInt    *send_rank,*recv_rank;
123   PetscInt       *rowrange=mat->rmap.range;
124   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
125   Mat            A=aij->A,B=aij->B,C=*matredundant;
126   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
127   PetscScalar    *sbuf_a;
128   PetscInt       nzlocal=a->nz+b->nz;
129   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
130   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
131   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
132   PetscScalar    *vals,*aworkA,*aworkB;
133   PetscMPIInt    tag1,tag2,tag3,imdex;
134   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
135   MPI_Status     recv_status,*send_status;
136   PetscInt       *sbuf_nz,*rbuf_nz,count;
137   PetscInt       **rbuf_j;
138   PetscScalar    **rbuf_a;
139   Mat_Redundant  *redund=PETSC_NULL;
140   PetscObjectContainer container;
141 
142   PetscFunctionBegin;
143   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
144   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
146 
147   if (reuse == MAT_REUSE_MATRIX) {
148     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
149     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
150     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
151     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
152     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
153     if (container) {
154       ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
155     } else {
156       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
157     }
158     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
159 
160     nsends    = redund->nsends;
161     nrecvs    = redund->nrecvs;
162     send_rank = redund->send_rank; recv_rank = send_rank + size;
163     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
164     sbuf_j    = redund->sbuf_j;
165     sbuf_a    = redund->sbuf_a;
166     rbuf_j    = redund->rbuf_j;
167     rbuf_a    = redund->rbuf_a;
168   }
169 
170   if (reuse == MAT_INITIAL_MATRIX){
171     PetscMPIInt  subrank,subsize;
172     PetscInt     nleftover,np_subcomm;
173     /* get the destination processors' id send_rank, nsends and nrecvs */
174     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
175     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
176     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
177     recv_rank = send_rank + size;
178     np_subcomm = size/nsubcomm;
179     nleftover  = size - nsubcomm*np_subcomm;
180     nsends = 0; nrecvs = 0;
181     for (i=0; i<size; i++){ /* i=rank*/
182       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
183         send_rank[nsends] = i; nsends++;
184         recv_rank[nrecvs++] = i;
185       }
186     }
187     if (rank >= size - nleftover){/* this proc is a leftover processor */
188       i = size-nleftover-1;
189       j = 0;
190       while (j < nsubcomm - nleftover){
191         send_rank[nsends++] = i;
192         i--; j++;
193       }
194     }
195 
196     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
197       for (i=0; i<nleftover; i++){
198         recv_rank[nrecvs++] = size-nleftover+i;
199       }
200     }
201 
202     /* allocate sbuf_j, sbuf_a */
203     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
204     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
205     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
206   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
207 
208   /* copy mat's local entries into the buffers */
209   if (reuse == MAT_INITIAL_MATRIX){
210     rownz_max = 0;
211     rptr = sbuf_j;
212     cols = sbuf_j + rend-rstart + 1;
213     vals = sbuf_a;
214     rptr[0] = 0;
215     for (i=0; i<rend-rstart; i++){
216       row = i + rstart;
217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
218       ncols  = nzA + nzB;
219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
221       /* load the column indices for this row into cols */
222       lwrite = 0;
223       for (l=0; l<nzB; l++) {
224         if ((ctmp = bmap[cworkB[l]]) < cstart){
225           vals[lwrite]   = aworkB[l];
226           cols[lwrite++] = ctmp;
227         }
228       }
229       for (l=0; l<nzA; l++){
230         vals[lwrite]   = aworkA[l];
231         cols[lwrite++] = cstart + cworkA[l];
232       }
233       for (l=0; l<nzB; l++) {
234         if ((ctmp = bmap[cworkB[l]]) >= cend){
235           vals[lwrite]   = aworkB[l];
236           cols[lwrite++] = ctmp;
237         }
238       }
239       vals += ncols;
240       cols += ncols;
241       rptr[i+1] = rptr[i] + ncols;
242       if (rownz_max < ncols) rownz_max = ncols;
243     }
244     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
245   } else { /* only copy matrix values into sbuf_a */
246     rptr = sbuf_j;
247     vals = sbuf_a;
248     rptr[0] = 0;
249     for (i=0; i<rend-rstart; i++){
250       row = i + rstart;
251       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
252       ncols  = nzA + nzB;
253       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
254       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
255       lwrite = 0;
256       for (l=0; l<nzB; l++) {
257         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
258       }
259       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
260       for (l=0; l<nzB; l++) {
261         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
262       }
263       vals += ncols;
264       rptr[i+1] = rptr[i] + ncols;
265     }
266   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
267 
268   /* send nzlocal to others, and recv other's nzlocal */
269   /*--------------------------------------------------*/
270   if (reuse == MAT_INITIAL_MATRIX){
271     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
272     s_waits2 = s_waits3 + nsends;
273     s_waits1 = s_waits2 + nsends;
274     r_waits1 = s_waits1 + nsends;
275     r_waits2 = r_waits1 + nrecvs;
276     r_waits3 = r_waits2 + nrecvs;
277   } else {
278     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
279     r_waits3 = s_waits3 + nsends;
280   }
281 
282   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
283   if (reuse == MAT_INITIAL_MATRIX){
284     /* get new tags to keep the communication clean */
285     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
286     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
287     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
288     rbuf_nz = sbuf_nz + nsends;
289 
290     /* post receives of other's nzlocal */
291     for (i=0; i<nrecvs; i++){
292       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
293     }
294     /* send nzlocal to others */
295     for (i=0; i<nsends; i++){
296       sbuf_nz[i] = nzlocal;
297       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
298     }
299     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
300     count = nrecvs;
301     while (count) {
302       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
303       recv_rank[imdex] = recv_status.MPI_SOURCE;
304       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
305       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
306 
307       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
308       rbuf_nz[imdex] += i + 2;
309       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
310       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
311       count--;
312     }
313     /* wait on sends of nzlocal */
314     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
315     /* send mat->i,j to others, and recv from other's */
316     /*------------------------------------------------*/
317     for (i=0; i<nsends; i++){
318       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
319       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
320     }
321     /* wait on receives of mat->i,j */
322     /*------------------------------*/
323     count = nrecvs;
324     while (count) {
325       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
326       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
327       count--;
328     }
329     /* wait on sends of mat->i,j */
330     /*---------------------------*/
331     if (nsends) {
332       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
333     }
334   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
335 
336   /* post receives, send and receive mat->a */
337   /*----------------------------------------*/
338   for (imdex=0; imdex<nrecvs; imdex++) {
339     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
340   }
341   for (i=0; i<nsends; i++){
342     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
343   }
344   count = nrecvs;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
347     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
348     count--;
349   }
350   if (nsends) {
351     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
352   }
353 
354   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
355 
356   /* create redundant matrix */
357   /*-------------------------*/
358   if (reuse == MAT_INITIAL_MATRIX){
359     /* compute rownz_max for preallocation */
360     for (imdex=0; imdex<nrecvs; imdex++){
361       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
362       rptr = rbuf_j[imdex];
363       for (i=0; i<j; i++){
364         ncols = rptr[i+1] - rptr[i];
365         if (rownz_max < ncols) rownz_max = ncols;
366       }
367     }
368 
369     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
370     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
371     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
372     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
373     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
374   } else {
375     C = *matredundant;
376   }
377 
378   /* insert local matrix entries */
379   rptr = sbuf_j;
380   cols = sbuf_j + rend-rstart + 1;
381   vals = sbuf_a;
382   for (i=0; i<rend-rstart; i++){
383     row   = i + rstart;
384     ncols = rptr[i+1] - rptr[i];
385     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
386     vals += ncols;
387     cols += ncols;
388   }
389   /* insert received matrix entries */
390   for (imdex=0; imdex<nrecvs; imdex++){
391     rstart = rowrange[recv_rank[imdex]];
392     rend   = rowrange[recv_rank[imdex]+1];
393     rptr = rbuf_j[imdex];
394     cols = rbuf_j[imdex] + rend-rstart + 1;
395     vals = rbuf_a[imdex];
396     for (i=0; i<rend-rstart; i++){
397       row   = i + rstart;
398       ncols = rptr[i+1] - rptr[i];
399       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
400       vals += ncols;
401       cols += ncols;
402     }
403   }
404   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
406   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
407   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
408   if (reuse == MAT_INITIAL_MATRIX){
409     PetscObjectContainer container;
410     *matredundant = C;
411     /* create a supporting struct and attach it to C for reuse */
412     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
413     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
414     ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr);
415     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
416     ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr);
417 
418     redund->nzlocal = nzlocal;
419     redund->nsends  = nsends;
420     redund->nrecvs  = nrecvs;
421     redund->send_rank = send_rank;
422     redund->sbuf_nz = sbuf_nz;
423     redund->sbuf_j  = sbuf_j;
424     redund->sbuf_a  = sbuf_a;
425     redund->rbuf_j  = rbuf_j;
426     redund->rbuf_a  = rbuf_a;
427 
428     redund->MatDestroy = C->ops->destroy;
429     C->ops->destroy    = MatDestroy_MatRedundant;
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "PCSetUp_Redundant"
436 static PetscErrorCode PCSetUp_Redundant(PC pc)
437 {
438   PC_Redundant   *red  = (PC_Redundant*)pc->data;
439   PetscErrorCode ierr;
440   PetscInt       mstart,mend,mlocal,m;
441   PetscMPIInt    size;
442   IS             isl;
443   MatReuse       reuse = MAT_INITIAL_MATRIX;
444   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
445   MPI_Comm       comm;
446   Vec            vec;
447 
448   PetscInt    mlocal_sub;
449   PetscMPIInt subsize,subrank;
450   PetscInt    rstart_sub,rend_sub,mloc_sub;
451 
452   PetscFunctionBegin;
453   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
454   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
455   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
456   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
457   if (!pc->setupcalled) {
458     /* create working vectors xsub/ysub and xdup/ydup */
459     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
460     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
461 
462     /* get local size of xsub/ysub */
463     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
464     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
465     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
466     if (subrank+1 < subsize){
467       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
468     } else {
469       rend_sub = m;
470     }
471     mloc_sub = rend_sub - rstart_sub;
472     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
473     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
474     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
475 
476     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
477        Note: we use communicator dupcomm, not pc->comm! */
478     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
479     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
480 
481     /* create vec scatters */
482     if (!red->scatterin){
483       IS       is1,is2;
484       PetscInt *idx1,*idx2,i,j,k;
485       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
486       idx2 = idx1 + red->nsubcomm*mlocal;
487       j = 0;
488       for (k=0; k<red->nsubcomm; k++){
489         for (i=mstart; i<mend; i++){
490           idx1[j]   = i;
491           idx2[j++] = i + m*k;
492         }
493       }
494       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
495       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
496       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
497       ierr = ISDestroy(is1);CHKERRQ(ierr);
498       ierr = ISDestroy(is2);CHKERRQ(ierr);
499 
500       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
501       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
502       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
503       ierr = ISDestroy(is1);CHKERRQ(ierr);
504       ierr = ISDestroy(is2);CHKERRQ(ierr);
505       ierr = PetscFree(idx1);CHKERRQ(ierr);
506     }
507   }
508   ierr = VecDestroy(vec);CHKERRQ(ierr);
509 
510   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
512   if (size == 1) {
513     red->useparallelmat = PETSC_FALSE;
514   }
515 
516   if (red->useparallelmat) {
517     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
518       /* destroy old matrices */
519       if (red->pmats) {
520         //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
521         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
522       }
523     } else if (pc->setupcalled == 1) {
524       reuse = MAT_REUSE_MATRIX;
525       str   = SAME_NONZERO_PATTERN;
526     }
527 
528     /* grab the parallel matrix and put it into processors of a subcomminicator */
529     /*--------------------------------------------------------------------------*/
530     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
531     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
532     /* tell PC of the subcommunicator its operators */
533     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
534   } else {
535     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
536   }
537   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
538   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "PCApply_Redundant"
544 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
545 {
546   PC_Redundant   *red = (PC_Redundant*)pc->data;
547   PetscErrorCode ierr;
548   PetscScalar    *array;
549 
550   PetscFunctionBegin;
551   /* scatter x to xdup */
552   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
553   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
554 
555   /* place xdup's local array into xsub */
556   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
557   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
558 
559   /* apply preconditioner on each processor */
560   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
561   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
562   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
563 
564   /* place ysub's local array into ydup */
565   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
566   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
567 
568   /* scatter ydup to y */
569   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
570   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
571   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
572   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "PCDestroy_Redundant"
578 static PetscErrorCode PCDestroy_Redundant(PC pc)
579 {
580   PC_Redundant   *red = (PC_Redundant*)pc->data;
581   PetscErrorCode ierr;
582 
583   PetscFunctionBegin;
584   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
585   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
586   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
587   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
588   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
589   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
590   if (red->pmats) {
591     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
592   }
593 
594 
595   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
596   ierr = PetscFree(red);CHKERRQ(ierr);
597   PetscFunctionReturn(0);
598 }
599 
600 #undef __FUNCT__
601 #define __FUNCT__ "PCSetFromOptions_Redundant"
602 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
603 {
604   PetscFunctionBegin;
605   PetscFunctionReturn(0);
606 }
607 
608 EXTERN_C_BEGIN
609 #undef __FUNCT__
610 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
611 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
612 {
613   PC_Redundant   *red = (PC_Redundant*)pc->data;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   red->scatterin  = in;
618   red->scatterout = out;
619   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
620   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 EXTERN_C_END
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "PCRedundantSetScatter"
627 /*@
628    PCRedundantSetScatter - Sets the scatter used to copy values into the
629      redundant local solve and the scatter to move them back into the global
630      vector.
631 
632    Collective on PC
633 
634    Input Parameters:
635 +  pc - the preconditioner context
636 .  in - the scatter to move the values in
637 -  out - the scatter to move them out
638 
639    Level: advanced
640 
641 .keywords: PC, redundant solve
642 @*/
643 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
644 {
645   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
646 
647   PetscFunctionBegin;
648   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
649   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
650   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
651   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
652   if (f) {
653     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 EXTERN_C_BEGIN
659 #undef __FUNCT__
660 #define __FUNCT__ "PCRedundantGetPC_Redundant"
661 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
662 {
663   PC_Redundant *red = (PC_Redundant*)pc->data;
664 
665   PetscFunctionBegin;
666   *innerpc = red->pc;
667   PetscFunctionReturn(0);
668 }
669 EXTERN_C_END
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "PCRedundantGetPC"
673 /*@
674    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
675 
676    Not Collective
677 
678    Input Parameter:
679 .  pc - the preconditioner context
680 
681    Output Parameter:
682 .  innerpc - the sequential PC
683 
684    Level: advanced
685 
686 .keywords: PC, redundant solve
687 @*/
688 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
689 {
690   PetscErrorCode ierr,(*f)(PC,PC*);
691 
692   PetscFunctionBegin;
693   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
694   PetscValidPointer(innerpc,2);
695   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
696   if (f) {
697     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
698   }
699   PetscFunctionReturn(0);
700 }
701 
702 EXTERN_C_BEGIN
703 #undef __FUNCT__
704 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
705 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
706 {
707   PC_Redundant *red = (PC_Redundant*)pc->data;
708 
709   PetscFunctionBegin;
710   if (mat)  *mat  = red->pmats;
711   if (pmat) *pmat = red->pmats;
712   PetscFunctionReturn(0);
713 }
714 EXTERN_C_END
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "PCRedundantGetOperators"
718 /*@
719    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
720 
721    Not Collective
722 
723    Input Parameter:
724 .  pc - the preconditioner context
725 
726    Output Parameters:
727 +  mat - the matrix
728 -  pmat - the (possibly different) preconditioner matrix
729 
730    Level: advanced
731 
732 .keywords: PC, redundant solve
733 @*/
734 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
735 {
736   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
737 
738   PetscFunctionBegin;
739   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
740   if (mat)  PetscValidPointer(mat,2);
741   if (pmat) PetscValidPointer(pmat,3);
742   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
743   if (f) {
744     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 /* -------------------------------------------------------------------------------------*/
750 /*MC
751      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
752 
753 
754      Options for the redundant preconditioners can be set with -redundant_pc_xxx
755 
756    Level: intermediate
757 
758 
759 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
760            PCRedundantGetPC(), PCRedundantGetOperators()
761 
762 M*/
763 
764 EXTERN_C_BEGIN
765 #undef __FUNCT__
766 #define __FUNCT__ "PCCreate_Redundant"
767 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
768 {
769   PetscErrorCode ierr;
770   PC_Redundant   *red;
771   const char     *prefix;
772 
773   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
774   PetscMPIInt    rank,size,subrank,*subsize;
775   MPI_Comm       subcomm;
776   PetscMPIInt    duprank;
777   PetscMPIInt    rank_dup,size_dup;
778   MPI_Comm       dupcomm;
779 
780   PetscFunctionBegin;
781   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
782   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
783   red->useparallelmat   = PETSC_TRUE;
784 
785   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
786   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
787   nsubcomm = size;
788   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
789   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
790 
791   /* get size of each subcommunicators */
792   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
793   np_subcomm = size/nsubcomm;
794   nleftover  = size - nsubcomm*np_subcomm;
795   for (i=0; i<nsubcomm; i++){
796     subsize[i] = np_subcomm;
797     if (i<nleftover) subsize[i]++;
798   }
799 
800   /* find color for this proc */
801   color   = rank%nsubcomm;
802   subrank = rank/nsubcomm;
803 
804   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
805   red->subcomm  = subcomm;
806   red->color    = color;
807   red->nsubcomm = nsubcomm;
808 
809   j = 0; duprank = 0;
810   for (i=0; i<nsubcomm; i++){
811     if (j == color){
812       duprank += subrank;
813       break;
814     }
815     duprank += subsize[i]; j++;
816   }
817   /*
818   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
819   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
820   */
821 
822   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
823   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
824   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
825   /*
826   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
827   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
828   */
829   red->dupcomm = dupcomm;
830   ierr = PetscFree(subsize);CHKERRQ(ierr);
831 
832   /* create the sequential PC that each processor has copy of */
833   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
834   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
835   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
836   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
837   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
838 
839   pc->ops->apply             = PCApply_Redundant;
840   pc->ops->applytranspose    = 0;
841   pc->ops->setup             = PCSetUp_Redundant;
842   pc->ops->destroy           = PCDestroy_Redundant;
843   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
844   pc->ops->setuponblocks     = 0;
845   pc->ops->view              = PCView_Redundant;
846   pc->ops->applyrichardson   = 0;
847 
848   pc->data     = (void*)red;
849 
850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
851                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
853                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
855                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
856 
857   PetscFunctionReturn(0);
858 }
859 EXTERN_C_END
860