xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision 64260623127d6f1ffcea0aca5d0d4512e7d82dc5)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
14   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
15   PetscTruth useparallelmat;
16   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
17   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
18                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
19   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
20   PetscInt   color;                /* color of processors in a subcommunicator */
21 } PC_Redundant;
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "PCView_Redundant"
25 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
26 {
27   PC_Redundant   *red = (PC_Redundant*)pc->data;
28   PetscErrorCode ierr;
29   PetscMPIInt    rank;
30   PetscTruth     iascii,isstring;
31   PetscViewer    sviewer;
32 
33   PetscFunctionBegin;
34   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
35   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
37   if (iascii) {
38     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
39     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
40     if (!rank) {
41       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
42       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
43       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
44     }
45     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
46   } else if (isstring) {
47     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
48     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
49     if (!rank) {
50       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
51     }
52     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
53   } else {
54     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
60 #include "private/vecimpl.h"
61 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
62 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
63 
64 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
65   PetscInt       nzlocal,nsends,nrecvs;
66   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
67   PetscScalar    *sbuf_a,**rbuf_a;
68   PetscErrorCode (*MatDestroy)(Mat);
69 } Mat_Redundant;
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant"
73 PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr)
74 {
75   PetscErrorCode       ierr;
76   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
77   PetscInt             i;
78 
79   PetscFunctionBegin;
80   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
81   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
82   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
83   for (i=0; i<redund->nrecvs; i++){
84     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
85     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
86   }
87   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
88   ierr = PetscFree(redund);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 #undef __FUNCT__
93 #define __FUNCT__ "MatDestroy_MatRedundant"
94 PetscErrorCode MatDestroy_MatRedundant(Mat A)
95 {
96   PetscErrorCode       ierr;
97   PetscObjectContainer container;
98   Mat_Redundant        *redund=PETSC_NULL;
99 
100   PetscFunctionBegin;
101   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
102   if (container) {
103     ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
104   } else {
105     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
106   }
107   A->ops->destroy = redund->MatDestroy;
108   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
109   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
110   ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "MatGetRedundantMatrix"
116 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
117 {
118   PetscMPIInt    rank,size;
119   MPI_Comm       comm=mat->comm;
120   PetscErrorCode ierr;
121   PetscInt       nsends,nrecvs,i,prid=100,rownz_max;
122   PetscMPIInt    *send_rank,*recv_rank;
123   PetscInt       *rowrange=mat->rmap.range;
124   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
125   Mat            A=aij->A,B=aij->B,C=*matredundant;
126   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
127   PetscScalar    *sbuf_a;
128   PetscInt       nzlocal=a->nz+b->nz;
129   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
130   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
131   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
132   PetscScalar    *vals,*aworkA,*aworkB;
133   PetscMPIInt    tag1,tag2,tag3,imdex;
134   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
135   MPI_Status     recv_status,*send_status;
136   PetscInt       *sbuf_nz,*rbuf_nz,count;
137   PetscInt       **rbuf_j;
138   PetscScalar    **rbuf_a;
139   Mat_Redundant  *redund=PETSC_NULL;
140   PetscObjectContainer container;
141 
142   PetscFunctionBegin;
143   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
144   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
145   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
146 
147   if (reuse == MAT_REUSE_MATRIX) {
148     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
149     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
150     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
151     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
152     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
153     if (container) {
154       ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
155     } else {
156       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
157     }
158     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
159 
160     nsends    = redund->nsends;
161     nrecvs    = redund->nrecvs;
162     send_rank = redund->send_rank; recv_rank = send_rank + size;
163     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
164     sbuf_j    = redund->sbuf_j;
165     sbuf_a    = redund->sbuf_a;
166     rbuf_j    = redund->rbuf_j;
167     rbuf_a    = redund->rbuf_a;
168   }
169 
170   if (reuse == MAT_INITIAL_MATRIX){
171     PetscMPIInt  subrank,subsize;
172     PetscInt     nleftover,np_subcomm;
173     /* get the destination processors' id send_rank, nsends and nrecvs */
174     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
175     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
176     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
177     recv_rank = send_rank + size;
178     np_subcomm = size/nsubcomm;
179     nleftover  = size - nsubcomm*np_subcomm;
180     nsends = 0; nrecvs = 0;
181     for (i=0; i<size; i++){ /* i=rank*/
182       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
183         send_rank[nsends] = i; nsends++;
184         recv_rank[nrecvs++] = i;
185       }
186     }
187     if (rank >= size - nleftover){/* this proc is a leftover processor */
188       i = size-nleftover-1;
189       j = 0;
190       while (j < nsubcomm - nleftover){
191         send_rank[nsends++] = i;
192         i--; j++;
193       }
194     }
195 
196     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
197       for (i=0; i<nleftover; i++){
198         recv_rank[nrecvs++] = size-nleftover+i;
199       }
200     }
201 
202     /* allocate sbuf_j, sbuf_a */
203     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
204     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
205     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
206   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
207 
208   /* copy mat's local entries into the buffers */
209   if (reuse == MAT_INITIAL_MATRIX){
210     rownz_max = 0;
211     rptr = sbuf_j;
212     cols = sbuf_j + rend-rstart + 1;
213     vals = sbuf_a;
214     rptr[0] = 0;
215     for (i=0; i<rend-rstart; i++){
216       row = i + rstart;
217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
218       ncols  = nzA + nzB;
219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
221       /* load the column indices for this row into cols */
222       lwrite = 0;
223       for (l=0; l<nzB; l++) {
224         if ((ctmp = bmap[cworkB[l]]) < cstart){
225           vals[lwrite]   = aworkB[l];
226           cols[lwrite++] = ctmp;
227         }
228       }
229       for (l=0; l<nzA; l++){
230         vals[lwrite]   = aworkA[l];
231         cols[lwrite++] = cstart + cworkA[l];
232       }
233       for (l=0; l<nzB; l++) {
234         if ((ctmp = bmap[cworkB[l]]) >= cend){
235           vals[lwrite]   = aworkB[l];
236           cols[lwrite++] = ctmp;
237         }
238       }
239       vals += ncols;
240       cols += ncols;
241       rptr[i+1] = rptr[i] + ncols;
242       if (rownz_max < ncols) rownz_max = ncols;
243     }
244     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
245   } else { /* only copy matrix values into sbuf_a */
246     rptr = sbuf_j;
247     vals = sbuf_a;
248     rptr[0] = 0;
249     for (i=0; i<rend-rstart; i++){
250       row = i + rstart;
251       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
252       ncols  = nzA + nzB;
253       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
254       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
255       lwrite = 0;
256       for (l=0; l<nzB; l++) {
257         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
258       }
259       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
260       for (l=0; l<nzB; l++) {
261         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
262       }
263       vals += ncols;
264       rptr[i+1] = rptr[i] + ncols;
265     }
266   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
267 
268   /* send nzlocal to others, and recv other's nzlocal */
269   /*--------------------------------------------------*/
270   if (reuse == MAT_INITIAL_MATRIX){
271     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
272     s_waits2 = s_waits3 + nsends;
273     s_waits1 = s_waits2 + nsends;
274     r_waits1 = s_waits1 + nsends;
275     r_waits2 = r_waits1 + nrecvs;
276     r_waits3 = r_waits2 + nrecvs;
277   } else {
278     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
279     r_waits3 = s_waits3 + nsends;
280   }
281 
282   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
283   if (reuse == MAT_INITIAL_MATRIX){
284     /* get new tags to keep the communication clean */
285     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
286     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
287     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
288     rbuf_nz = sbuf_nz + nsends;
289 
290     /* post receives of other's nzlocal */
291     for (i=0; i<nrecvs; i++){
292       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
293     }
294     /* send nzlocal to others */
295     for (i=0; i<nsends; i++){
296       sbuf_nz[i] = nzlocal;
297       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
298     }
299     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
300     count = nrecvs;
301     while (count) {
302       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
303       recv_rank[imdex] = recv_status.MPI_SOURCE;
304       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
305       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
306 
307       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
308       rbuf_nz[imdex] += i + 2;
309       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
310       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
311       count--;
312     }
313     /* wait on sends of nzlocal */
314     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
315     /* send mat->i,j to others, and recv from other's */
316     /*------------------------------------------------*/
317     for (i=0; i<nsends; i++){
318       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
319       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
320     }
321     /* wait on receives of mat->i,j */
322     /*------------------------------*/
323     count = nrecvs;
324     while (count) {
325       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
326       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
327       count--;
328     }
329     /* wait on sends of mat->i,j */
330     /*---------------------------*/
331     if (nsends) {
332       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
333     }
334   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
335 
336   /* post receives, send and receive mat->a */
337   /*----------------------------------------*/
338   for (imdex=0; imdex<nrecvs; imdex++) {
339     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
340   }
341   for (i=0; i<nsends; i++){
342     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
343   }
344   count = nrecvs;
345   while (count) {
346     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
347     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
348     count--;
349   }
350   if (nsends) {
351     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
352   }
353 
354   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
355 
356   /* create redundant matrix */
357   /*-------------------------*/
358   if (reuse == MAT_INITIAL_MATRIX){
359     /* compute rownz_max for preallocation */
360     for (imdex=0; imdex<nrecvs; imdex++){
361       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
362       rptr = rbuf_j[imdex];
363       for (i=0; i<j; i++){
364         ncols = rptr[i+1] - rptr[i];
365         if (rownz_max < ncols) rownz_max = ncols;
366       }
367     }
368 
369     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
370     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
371     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
372     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
373     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
374   } else {
375     C = *matredundant;
376   }
377 
378   /* insert local matrix entries */
379   rptr = sbuf_j;
380   cols = sbuf_j + rend-rstart + 1;
381   vals = sbuf_a;
382   for (i=0; i<rend-rstart; i++){
383     row   = i + rstart;
384     ncols = rptr[i+1] - rptr[i];
385     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
386     vals += ncols;
387     cols += ncols;
388   }
389   /* insert received matrix entries */
390   for (imdex=0; imdex<nrecvs; imdex++){
391     rstart = rowrange[recv_rank[imdex]];
392     rend   = rowrange[recv_rank[imdex]+1];
393     rptr = rbuf_j[imdex];
394     cols = rbuf_j[imdex] + rend-rstart + 1;
395     vals = rbuf_a[imdex];
396     for (i=0; i<rend-rstart; i++){
397       row   = i + rstart;
398       ncols = rptr[i+1] - rptr[i];
399       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
400       vals += ncols;
401       cols += ncols;
402     }
403   }
404   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
406   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
407   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
408   if (reuse == MAT_INITIAL_MATRIX){
409     PetscObjectContainer container;
410     *matredundant = C;
411     /* create a supporting struct and attach it to C for reuse */
412     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
413     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
414     ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr);
415     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
416     ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr);
417 
418     redund->nzlocal = nzlocal;
419     redund->nsends  = nsends;
420     redund->nrecvs  = nrecvs;
421     redund->send_rank = send_rank;
422     redund->sbuf_nz = sbuf_nz;
423     redund->sbuf_j  = sbuf_j;
424     redund->sbuf_a  = sbuf_a;
425     redund->rbuf_j  = rbuf_j;
426     redund->rbuf_a  = rbuf_a;
427 
428     redund->MatDestroy = C->ops->destroy;
429     C->ops->destroy    = MatDestroy_MatRedundant;
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "PCSetUp_Redundant"
436 static PetscErrorCode PCSetUp_Redundant(PC pc)
437 {
438   PC_Redundant   *red  = (PC_Redundant*)pc->data;
439   PetscErrorCode ierr;
440   PetscInt       mstart,mend,mlocal,m;
441   PetscMPIInt    size;
442   MatReuse       reuse = MAT_INITIAL_MATRIX;
443   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
444   MPI_Comm       comm;
445   Vec            vec;
446 
447   PetscInt    mlocal_sub;
448   PetscMPIInt subsize,subrank;
449   PetscInt    rstart_sub,rend_sub,mloc_sub;
450 
451   PetscFunctionBegin;
452   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
453   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
454   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
455   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
456   if (!pc->setupcalled) {
457     /* create working vectors xsub/ysub and xdup/ydup */
458     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
459     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
460 
461     /* get local size of xsub/ysub */
462     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
463     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
464     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
465     if (subrank+1 < subsize){
466       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
467     } else {
468       rend_sub = m;
469     }
470     mloc_sub = rend_sub - rstart_sub;
471     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
472     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
473     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
474 
475     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
476        Note: we use communicator dupcomm, not pc->comm! */
477     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
478     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
479 
480     /* create vec scatters */
481     if (!red->scatterin){
482       IS       is1,is2;
483       PetscInt *idx1,*idx2,i,j,k;
484       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
485       idx2 = idx1 + red->nsubcomm*mlocal;
486       j = 0;
487       for (k=0; k<red->nsubcomm; k++){
488         for (i=mstart; i<mend; i++){
489           idx1[j]   = i;
490           idx2[j++] = i + m*k;
491         }
492       }
493       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
494       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
495       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
496       ierr = ISDestroy(is1);CHKERRQ(ierr);
497       ierr = ISDestroy(is2);CHKERRQ(ierr);
498 
499       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
500       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
501       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
502       ierr = ISDestroy(is1);CHKERRQ(ierr);
503       ierr = ISDestroy(is2);CHKERRQ(ierr);
504       ierr = PetscFree(idx1);CHKERRQ(ierr);
505     }
506   }
507   ierr = VecDestroy(vec);CHKERRQ(ierr);
508 
509   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
510   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
511   if (size == 1) {
512     red->useparallelmat = PETSC_FALSE;
513   }
514 
515   if (red->useparallelmat) {
516     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
517       /* destroy old matrices */
518       if (red->pmats) {
519         //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
520         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
521       }
522     } else if (pc->setupcalled == 1) {
523       reuse = MAT_REUSE_MATRIX;
524       str   = SAME_NONZERO_PATTERN;
525     }
526 
527     /* grab the parallel matrix and put it into processors of a subcomminicator */
528     /*--------------------------------------------------------------------------*/
529     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
530     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
531     /* tell PC of the subcommunicator its operators */
532     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
533   } else {
534     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
535   }
536   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
537   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "PCApply_Redundant"
543 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
544 {
545   PC_Redundant   *red = (PC_Redundant*)pc->data;
546   PetscErrorCode ierr;
547   PetscScalar    *array;
548 
549   PetscFunctionBegin;
550   /* scatter x to xdup */
551   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
552   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
553 
554   /* place xdup's local array into xsub */
555   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
556   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
557 
558   /* apply preconditioner on each processor */
559   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
560   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
561   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
562 
563   /* place ysub's local array into ydup */
564   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
565   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
566 
567   /* scatter ydup to y */
568   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
569   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
570   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
571   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "PCDestroy_Redundant"
577 static PetscErrorCode PCDestroy_Redundant(PC pc)
578 {
579   PC_Redundant   *red = (PC_Redundant*)pc->data;
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
584   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
585   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
586   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
587   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
588   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
589   if (red->pmats) {
590     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
591   }
592 
593 
594   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
595   ierr = PetscFree(red);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "PCSetFromOptions_Redundant"
601 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
602 {
603   PetscFunctionBegin;
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
610 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
611 {
612   PC_Redundant   *red = (PC_Redundant*)pc->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   red->scatterin  = in;
617   red->scatterout = out;
618   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
619   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 EXTERN_C_END
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "PCRedundantSetScatter"
626 /*@
627    PCRedundantSetScatter - Sets the scatter used to copy values into the
628      redundant local solve and the scatter to move them back into the global
629      vector.
630 
631    Collective on PC
632 
633    Input Parameters:
634 +  pc - the preconditioner context
635 .  in - the scatter to move the values in
636 -  out - the scatter to move them out
637 
638    Level: advanced
639 
640 .keywords: PC, redundant solve
641 @*/
642 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
643 {
644   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
645 
646   PetscFunctionBegin;
647   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
648   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
649   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
650   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
651   if (f) {
652     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
653   }
654   PetscFunctionReturn(0);
655 }
656 
657 EXTERN_C_BEGIN
658 #undef __FUNCT__
659 #define __FUNCT__ "PCRedundantGetPC_Redundant"
660 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
661 {
662   PC_Redundant *red = (PC_Redundant*)pc->data;
663 
664   PetscFunctionBegin;
665   *innerpc = red->pc;
666   PetscFunctionReturn(0);
667 }
668 EXTERN_C_END
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCRedundantGetPC"
672 /*@
673    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
674 
675    Not Collective
676 
677    Input Parameter:
678 .  pc - the preconditioner context
679 
680    Output Parameter:
681 .  innerpc - the sequential PC
682 
683    Level: advanced
684 
685 .keywords: PC, redundant solve
686 @*/
687 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
688 {
689   PetscErrorCode ierr,(*f)(PC,PC*);
690 
691   PetscFunctionBegin;
692   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
693   PetscValidPointer(innerpc,2);
694   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
695   if (f) {
696     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 EXTERN_C_BEGIN
702 #undef __FUNCT__
703 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
704 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
705 {
706   PC_Redundant *red = (PC_Redundant*)pc->data;
707 
708   PetscFunctionBegin;
709   if (mat)  *mat  = red->pmats;
710   if (pmat) *pmat = red->pmats;
711   PetscFunctionReturn(0);
712 }
713 EXTERN_C_END
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "PCRedundantGetOperators"
717 /*@
718    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
719 
720    Not Collective
721 
722    Input Parameter:
723 .  pc - the preconditioner context
724 
725    Output Parameters:
726 +  mat - the matrix
727 -  pmat - the (possibly different) preconditioner matrix
728 
729    Level: advanced
730 
731 .keywords: PC, redundant solve
732 @*/
733 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
734 {
735   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
736 
737   PetscFunctionBegin;
738   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
739   if (mat)  PetscValidPointer(mat,2);
740   if (pmat) PetscValidPointer(pmat,3);
741   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
742   if (f) {
743     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 /* -------------------------------------------------------------------------------------*/
749 /*MC
750      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
751 
752 
753      Options for the redundant preconditioners can be set with -redundant_pc_xxx
754 
755    Level: intermediate
756 
757 
758 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
759            PCRedundantGetPC(), PCRedundantGetOperators()
760 
761 M*/
762 
763 EXTERN_C_BEGIN
764 #undef __FUNCT__
765 #define __FUNCT__ "PCCreate_Redundant"
766 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
767 {
768   PetscErrorCode ierr;
769   PC_Redundant   *red;
770   const char     *prefix;
771 
772   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
773   PetscMPIInt    rank,size,subrank,*subsize;
774   MPI_Comm       subcomm;
775   PetscMPIInt    duprank;
776   PetscMPIInt    rank_dup,size_dup;
777   MPI_Comm       dupcomm;
778 
779   PetscFunctionBegin;
780   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
781   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
782   red->useparallelmat   = PETSC_TRUE;
783 
784   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
785   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
786   nsubcomm = size;
787   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
788   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
789 
790   /* get size of each subcommunicators */
791   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
792   np_subcomm = size/nsubcomm;
793   nleftover  = size - nsubcomm*np_subcomm;
794   for (i=0; i<nsubcomm; i++){
795     subsize[i] = np_subcomm;
796     if (i<nleftover) subsize[i]++;
797   }
798 
799   /* find color for this proc */
800   color   = rank%nsubcomm;
801   subrank = rank/nsubcomm;
802 
803   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
804   red->subcomm  = subcomm;
805   red->color    = color;
806   red->nsubcomm = nsubcomm;
807 
808   j = 0; duprank = 0;
809   for (i=0; i<nsubcomm; i++){
810     if (j == color){
811       duprank += subrank;
812       break;
813     }
814     duprank += subsize[i]; j++;
815   }
816   /*
817   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
818   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
819   */
820 
821   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
822   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
823   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
824   /*
825   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
826   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
827   */
828   red->dupcomm = dupcomm;
829   ierr = PetscFree(subsize);CHKERRQ(ierr);
830 
831   /* create the sequential PC that each processor has copy of */
832   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
833   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
834   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
835   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
836   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
837 
838   pc->ops->apply             = PCApply_Redundant;
839   pc->ops->applytranspose    = 0;
840   pc->ops->setup             = PCSetUp_Redundant;
841   pc->ops->destroy           = PCDestroy_Redundant;
842   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
843   pc->ops->setuponblocks     = 0;
844   pc->ops->view              = PCView_Redundant;
845   pc->ops->applyrichardson   = 0;
846 
847   pc->data     = (void*)red;
848 
849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
850                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
852                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
854                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
855 
856   PetscFunctionReturn(0);
857 }
858 EXTERN_C_END
859