xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision 0ae51fcdcedb33c1b8546b85eed5e90d983fd463)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        *pmats;               /* matrix and optional preconditioner matrix */
14   Mat        pmats_sub;            /* matrix and optional preconditioner matrix belong to a subcommunicator */
15   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
16   PetscTruth useparallelmat;
17   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
18   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
19                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
20   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
21   PetscInt   color;                /* color of processors in a subcommunicator */
22 } PC_Redundant;
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "PCView_Redundant"
26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
27 {
28   PC_Redundant   *red = (PC_Redundant*)pc->data;
29   PetscErrorCode ierr;
30   PetscMPIInt    rank;
31   PetscTruth     iascii,isstring;
32   PetscViewer    sviewer;
33 
34   PetscFunctionBegin;
35   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
37   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
38   if (iascii) {
39     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
40     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
41     if (!rank) {
42       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
43       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
44       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
45     }
46     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
47   } else if (isstring) {
48     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
49     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
50     if (!rank) {
51       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
54   } else {
55     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
61 #include "private/vecimpl.h"
62 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
63 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatGetRedundantMatrix"
67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
68 {
69   PetscMPIInt    rank,size,subrank,subsize;
70   MPI_Comm       comm=mat->comm;
71   PetscErrorCode ierr;
72   PetscInt       nsends,nrecvs,i,prid=100,itmp;
73   PetscMPIInt    *send_rank,*recv_rank;
74   PetscInt       *rowrange=mat->rmap.range,nzlocal;
75   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
76   Mat            A=aij->A,B=aij->B;
77   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
78   Mat            C=*matredundant;
79 
80   PetscInt       nleftover,np_subcomm,j;
81   PetscInt       nz_A,nz_B,*sbuf_j;
82   PetscScalar    *sbuf_a;
83   PetscInt       cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
84   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
85   PetscInt       *cols,ctmp,lwrite,*rptr,l;
86   PetscScalar    *vals,*aworkA,*aworkB;
87   PetscMPIInt    tag1,tag2,tag3,imdex;
88   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
89   MPI_Status     recv_status,*send_status;
90   PetscInt       *sbuf_nz,*rbuf_nz,count;
91   PetscInt       **rbuf_j;
92   PetscScalar    **rbuf_a;
93 
94   PetscFunctionBegin;
95   if (reuse == MAT_REUSE_MATRIX) {
96     PetscTruth flag;
97     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
98     if (M != N || M != mat->rmap.N) {
99       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong global size");
100     }
101     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
102     if (M != N || M != mlocal_sub) {
103       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong local size");
104     }
105   }
106 
107   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
108   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
110   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
111   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
112   /*
113   ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize);
114   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
115   */
116   /* get the destination processors */
117   ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
118   recv_rank = send_rank + size;
119   np_subcomm = size/nsubcomm;
120   nleftover  = size - nsubcomm*np_subcomm;
121   nsends = 0; nrecvs = 0;
122   for (i=0; i<size; i++){ /* i=rank*/
123     if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
124       send_rank[nsends] = i; nsends++;
125       recv_rank[nrecvs++] = i;
126     }
127   }
128   if (rank >= size - nleftover){/* this proc is a leftover processor */
129     i = size-nleftover-1;
130     j = 0;
131     while (j < nsubcomm - nleftover){
132       send_rank[nsends++] = i;
133       i--; j++;
134     }
135   }
136 
137   if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
138     for (i=0; i<nleftover; i++){
139       recv_rank[nrecvs++] = size-nleftover+i;
140     }
141   }
142   if (prid == rank){
143     printf("[%d] sends to ",rank);
144     for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]);
145     printf("  \n");
146   }
147   /*
148   ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
149   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
150   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
151   */
152 
153   /* get this processor's nzlocal=nz_A+nz_B */
154   nz_A = a->nz; nz_B = b->nz;
155   nzlocal = nz_A + nz_B;
156 
157   /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */
158   itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
159   ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
160   ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
161 
162     rptr = sbuf_j;
163     cols = sbuf_j + rend-rstart + 1;
164     vals = sbuf_a;
165     rptr[0] = 0;
166     for (i=0; i<rend-rstart; i++){
167       row = i + rstart;
168       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
169       ncols  = nzA + nzB;
170       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
171       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
172       /* load the column indices for this row into cols */
173       lwrite = 0;
174       for (l=0; l<nzB; l++) {
175         if ((ctmp = bmap[cworkB[l]]) < cstart){
176           vals[lwrite]   = aworkB[l];
177           cols[lwrite++] = ctmp;
178         }
179       }
180       for (l=0; l<nzA; l++){
181         vals[lwrite]   = aworkA[l];
182         cols[lwrite++] = cstart + cworkA[l];
183       }
184       for (l=0; l<nzB; l++) {
185         if ((ctmp = bmap[cworkB[l]]) >= cend){
186           vals[lwrite]   = aworkB[l];
187           cols[lwrite++] = ctmp;
188         }
189       }
190       /* insert local matrix values into C */
191       /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */
192 
193       vals += ncols;
194       cols += ncols;
195       rptr[i+1] = rptr[i] + ncols;
196     }
197     /*
198     ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199     ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
200     */
201     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
202 
203   /* send nzlocal to others, and recv other's nzlocal */
204   /*--------------------------------------------------*/
205   ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
206   rbuf_nz = sbuf_nz + nsends;
207 
208   ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
209   s_waits2 = s_waits1 + nsends;
210   s_waits3 = s_waits2 + nsends;
211   r_waits1 = s_waits3 + nsends;
212   r_waits2 = r_waits1 + nrecvs;
213   r_waits3 = r_waits2 + nrecvs;
214 
215   /* get some new tags to keep the communication clean */
216   ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr);
217   ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr);
218   ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr);
219 
220   /* post receives of other's nzlocal */
221   for (i=0; i<nrecvs; i++){
222     ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
223   }
224 
225   /* send nzlocal to others */
226   for (i=0; i<nsends; i++){
227     sbuf_nz[i] = nzlocal;
228     ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
229     if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]);
230   }
231 
232   /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
233   count = nrecvs;
234   while (count) {
235     ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
236     recv_rank[imdex] = recv_status.MPI_SOURCE;
237     /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */
238     ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
239     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
240 
241     itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
242     rbuf_nz[imdex] += itmp+2;
243     ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
244     ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
245     count--;
246   }
247 
248   /* wait on sends of nzlocal */
249   if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
250 
251   /* send mat->i,j and mat->a to others, and recv from other's */
252   /*-----------------------------------------------------------*/
253   for (i=0; i<nsends; i++){
254     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
255     itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
256     ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
257   }
258 
259   /* wait on receives of mat->i,j and mat->a */
260   /*-----------------------------------------*/
261   count = nrecvs;
262   while (count) {
263     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
264     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
265     ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
266     count--;
267   }
268 
269   /* wait on sends of mat->i,j and mat->a */
270   /*--------------------------------------*/
271   if (nsends) {
272     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
273     ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
274   }
275   ierr = PetscFree(sbuf_nz);CHKERRQ(ierr);
276   ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr);
277 
278   /* create redundant matrix */
279   /*-------------------------*/
280   if (reuse == MAT_INITIAL_MATRIX){
281     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
282     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
283     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
284   } else {
285     C = *matredundant;
286   }
287   /* insert local matrix entries */
288   rptr = sbuf_j;
289   cols = sbuf_j + rend-rstart + 1;
290   vals = sbuf_a;
291   for (i=0; i<rend-rstart; i++){
292     row   = i + rstart;
293     ncols = rptr[i+1] - rptr[i];
294     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
295     vals += ncols;
296     cols += ncols;
297   }
298   /* insert received matrix entries */
299   for (imdex=0; imdex<nrecvs; imdex++){
300 
301     rstart = rowrange[recv_rank[imdex]];
302     rend   = rowrange[recv_rank[imdex]+1];
303     rptr = rbuf_j[imdex];
304     cols = rbuf_j[imdex] + rend-rstart + 1;
305     vals = rbuf_a[imdex];
306     for (i=0; i<rend-rstart; i++){
307       row   = i + rstart;
308       ncols = rptr[i+1] - rptr[i];
309       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
310       vals += ncols;
311       cols += ncols;
312     }
313   }
314   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
316   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
317   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
318   if (reuse == MAT_INITIAL_MATRIX){
319     *matredundant = C;
320   }
321 
322   /* free space */
323   ierr = PetscFree(send_rank);CHKERRQ(ierr);
324   ierr = PetscFree(sbuf_j);CHKERRQ(ierr);
325   ierr = PetscFree(sbuf_a);CHKERRQ(ierr);
326   for (i=0; i<nrecvs; i++){
327     ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr);
328     ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr);
329   }
330   ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCSetUp_Redundant"
336 static PetscErrorCode PCSetUp_Redundant(PC pc)
337 {
338   PC_Redundant   *red  = (PC_Redundant*)pc->data;
339   PetscErrorCode ierr;
340   PetscInt       mstart,mend,mlocal,m;
341   PetscMPIInt    size;
342   IS             isl;
343   MatReuse       reuse = MAT_INITIAL_MATRIX;
344   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
345   MPI_Comm       comm;
346   Vec            vec;
347 
348   PetscInt    mlocal_sub;
349   PetscMPIInt subsize,subrank;
350   PetscInt    rstart_sub,rend_sub,mloc_sub;
351 
352   PetscFunctionBegin;
353   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
354   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
355   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
356   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
357   if (!pc->setupcalled) {
358     /* create working vectors xsub/ysub and xdup/ydup */
359     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
360     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
361 
362     /* get local size of xsub/ysub */
363     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
364     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
365     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
366     if (subrank+1 < subsize){
367       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
368     } else {
369       rend_sub = m;
370     }
371     mloc_sub = rend_sub - rstart_sub;
372     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
373     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
374     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
375 
376     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
377        Note: we use communicator dupcomm, not pc->comm! */
378     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
379     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
380 
381     /* create vec scatters */
382     if (!red->scatterin){
383       IS       is1,is2;
384       PetscInt *idx1,*idx2,i,j,k;
385       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
386       idx2 = idx1 + red->nsubcomm*mlocal;
387       j = 0;
388       for (k=0; k<red->nsubcomm; k++){
389         for (i=mstart; i<mend; i++){
390           idx1[j]   = i;
391           idx2[j++] = i + m*k;
392         }
393       }
394       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
395       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
396       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
397       ierr = ISDestroy(is1);CHKERRQ(ierr);
398       ierr = ISDestroy(is2);CHKERRQ(ierr);
399 
400       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
401       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
402       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
403       ierr = ISDestroy(is1);CHKERRQ(ierr);
404       ierr = ISDestroy(is2);CHKERRQ(ierr);
405       ierr = PetscFree(idx1);CHKERRQ(ierr);
406     }
407   }
408   ierr = VecDestroy(vec);CHKERRQ(ierr);
409 
410   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
412   if (size == 1) {
413     red->useparallelmat = PETSC_FALSE;
414   }
415 
416   if (red->useparallelmat) {
417     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
418       /* destroy old matrices */
419       if (red->pmats) {
420         ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
421       }
422     } else if (pc->setupcalled == 1) {
423       reuse = MAT_REUSE_MATRIX;
424       str   = SAME_NONZERO_PATTERN;
425     }
426 
427     /* grab the parallel matrix and put it into processors of a subcomminicator */
428     /*--------------------------------------------------------------------------*/
429     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
430     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats_sub);CHKERRQ(ierr);
431     /* tell PC of the subcommunicator its operators */
432     ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr);
433   } else {
434     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
435   }
436   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
437   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "PCApply_Redundant"
443 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
444 {
445   PC_Redundant   *red = (PC_Redundant*)pc->data;
446   PetscErrorCode ierr;
447   PetscScalar    *array;
448 
449   PetscFunctionBegin;
450   /* scatter x to xdup */
451   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
452   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
453 
454   /* place xdup's local array into xsub */
455   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
456   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
457 
458   /* apply preconditioner on each processor */
459   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
460   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
461   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
462 
463   /* place ysub's local array into ydup */
464   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
465   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
466 
467   /* scatter ydup to y */
468   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
469   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
470   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
471   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "PCDestroy_Redundant"
477 static PetscErrorCode PCDestroy_Redundant(PC pc)
478 {
479   PC_Redundant   *red = (PC_Redundant*)pc->data;
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
484   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
485   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
486   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
487   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
488   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
489   if (red->pmats_sub) {
490     ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr);
491   }
492 
493   if (red->pmats) {
494     ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
495   }
496   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
497   ierr = PetscFree(red);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "PCSetFromOptions_Redundant"
503 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
504 {
505   PetscFunctionBegin;
506   PetscFunctionReturn(0);
507 }
508 
509 EXTERN_C_BEGIN
510 #undef __FUNCT__
511 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
512 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
513 {
514   PC_Redundant   *red = (PC_Redundant*)pc->data;
515   PetscErrorCode ierr;
516 
517   PetscFunctionBegin;
518   red->scatterin  = in;
519   red->scatterout = out;
520   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
521   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 EXTERN_C_END
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "PCRedundantSetScatter"
528 /*@
529    PCRedundantSetScatter - Sets the scatter used to copy values into the
530      redundant local solve and the scatter to move them back into the global
531      vector.
532 
533    Collective on PC
534 
535    Input Parameters:
536 +  pc - the preconditioner context
537 .  in - the scatter to move the values in
538 -  out - the scatter to move them out
539 
540    Level: advanced
541 
542 .keywords: PC, redundant solve
543 @*/
544 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
545 {
546   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
547 
548   PetscFunctionBegin;
549   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
550   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
551   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
552   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
553   if (f) {
554     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 EXTERN_C_BEGIN
560 #undef __FUNCT__
561 #define __FUNCT__ "PCRedundantGetPC_Redundant"
562 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
563 {
564   PC_Redundant *red = (PC_Redundant*)pc->data;
565 
566   PetscFunctionBegin;
567   *innerpc = red->pc;
568   PetscFunctionReturn(0);
569 }
570 EXTERN_C_END
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "PCRedundantGetPC"
574 /*@
575    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
576 
577    Not Collective
578 
579    Input Parameter:
580 .  pc - the preconditioner context
581 
582    Output Parameter:
583 .  innerpc - the sequential PC
584 
585    Level: advanced
586 
587 .keywords: PC, redundant solve
588 @*/
589 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
590 {
591   PetscErrorCode ierr,(*f)(PC,PC*);
592 
593   PetscFunctionBegin;
594   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
595   PetscValidPointer(innerpc,2);
596   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
597   if (f) {
598     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 EXTERN_C_BEGIN
604 #undef __FUNCT__
605 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
606 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
607 {
608   PC_Redundant *red = (PC_Redundant*)pc->data;
609 
610   PetscFunctionBegin;
611   if (mat)  *mat  = red->pmats[0];
612   if (pmat) *pmat = red->pmats[0];
613   PetscFunctionReturn(0);
614 }
615 EXTERN_C_END
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "PCRedundantGetOperators"
619 /*@
620    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
621 
622    Not Collective
623 
624    Input Parameter:
625 .  pc - the preconditioner context
626 
627    Output Parameters:
628 +  mat - the matrix
629 -  pmat - the (possibly different) preconditioner matrix
630 
631    Level: advanced
632 
633 .keywords: PC, redundant solve
634 @*/
635 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
636 {
637   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
638 
639   PetscFunctionBegin;
640   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
641   if (mat)  PetscValidPointer(mat,2);
642   if (pmat) PetscValidPointer(pmat,3);
643   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
644   if (f) {
645     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 /* -------------------------------------------------------------------------------------*/
651 /*MC
652      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
653 
654 
655      Options for the redundant preconditioners can be set with -redundant_pc_xxx
656 
657    Level: intermediate
658 
659 
660 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
661            PCRedundantGetPC(), PCRedundantGetOperators()
662 
663 M*/
664 
665 EXTERN_C_BEGIN
666 #undef __FUNCT__
667 #define __FUNCT__ "PCCreate_Redundant"
668 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
669 {
670   PetscErrorCode ierr;
671   PC_Redundant   *red;
672   const char     *prefix;
673 
674   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
675   PetscMPIInt    rank,size,subrank,*subsize;
676   MPI_Comm       subcomm;
677   PetscMPIInt    duprank;
678   PetscMPIInt    rank_dup,size_dup;
679   MPI_Comm       dupcomm;
680 
681   PetscFunctionBegin;
682   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
683   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
684   red->useparallelmat   = PETSC_TRUE;
685 
686   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
687   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
688   nsubcomm = size;
689   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
690   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
691 
692   /* get size of each subcommunicators */
693   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
694   np_subcomm = size/nsubcomm;
695   nleftover  = size - nsubcomm*np_subcomm;
696   for (i=0; i<nsubcomm; i++){
697     subsize[i] = np_subcomm;
698     if (i<nleftover) subsize[i]++;
699   }
700 
701   /* find color for this proc */
702   color   = rank%nsubcomm;
703   subrank = rank/nsubcomm;
704 
705   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
706   red->subcomm  = subcomm;
707   red->color    = color;
708   red->nsubcomm = nsubcomm;
709 
710   j = 0; duprank = 0;
711   for (i=0; i<nsubcomm; i++){
712     if (j == color){
713       duprank += subrank;
714       break;
715     }
716     duprank += subsize[i]; j++;
717   }
718   /*
719   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
720   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
721   */
722 
723   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
724   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
725   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
726   /*
727   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
728   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
729   */
730   red->dupcomm = dupcomm;
731   ierr = PetscFree(subsize);CHKERRQ(ierr);
732 
733   /* create the sequential PC that each processor has copy of */
734   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
735   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
736   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
737   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
738   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
739 
740   pc->ops->apply             = PCApply_Redundant;
741   pc->ops->applytranspose    = 0;
742   pc->ops->setup             = PCSetUp_Redundant;
743   pc->ops->destroy           = PCDestroy_Redundant;
744   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
745   pc->ops->setuponblocks     = 0;
746   pc->ops->view              = PCView_Redundant;
747   pc->ops->applyrichardson   = 0;
748 
749   pc->data     = (void*)red;
750 
751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
752                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
754                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
756                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
757 
758   PetscFunctionReturn(0);
759 }
760 EXTERN_C_END
761