xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision 43fb2f9719b756666b8cbfe3c1e1d5dd5c3e4aaf)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        *pmats;               /* matrix and optional preconditioner matrix */
14   Mat        pmats_sub;            /* matrix and optional preconditioner matrix belong to a subcommunicator */
15   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
16   PetscTruth useparallelmat;
17   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
18   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
19                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
20   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
21   PetscInt   color;                /* color of processors in a subcommunicator */
22 } PC_Redundant;
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "PCView_Redundant"
26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
27 {
28   PC_Redundant   *red = (PC_Redundant*)pc->data;
29   PetscErrorCode ierr;
30   PetscMPIInt    rank;
31   PetscTruth     iascii,isstring;
32   PetscViewer    sviewer;
33 
34   PetscFunctionBegin;
35   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
37   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
38   if (iascii) {
39     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
40     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
41     if (!rank) {
42       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
43       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
44       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
45     }
46     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
47   } else if (isstring) {
48     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
49     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
50     if (!rank) {
51       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
54   } else {
55     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
61 #include "private/vecimpl.h"
62 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
63 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatGetRedundantMatrix"
67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
68 {
69   PetscMPIInt    rank,size,subrank,subsize;
70   MPI_Comm       comm=mat->comm;
71   PetscErrorCode ierr;
72   PetscInt       nsends,nrecvs,i,prid=100,itmp;
73   PetscMPIInt    *send_rank,*recv_rank;
74   PetscInt       *rowrange=mat->rmap.range,nzlocal;
75   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
76   Mat            A=aij->A,B=aij->B;
77   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
78   Mat            C;
79 
80   PetscInt       nleftover,np_subcomm,j;
81   PetscInt       nz_A,nz_B,*sbuf_j;
82   PetscScalar    *sbuf_a;
83   PetscInt       cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
84   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
85   PetscInt       *cols,ctmp,lwrite,*rptr,l;
86   PetscScalar    *vals,*aworkA,*aworkB;
87   PetscMPIInt    tag1,tag2,tag3,imdex;
88   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
89   MPI_Status     recv_status,*send_status;
90   PetscInt       *sbuf_nz,*rbuf_nz,count;
91   PetscInt       **rbuf_j;
92   PetscScalar    **rbuf_a;
93 
94   PetscFunctionBegin;
95   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
96   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
97   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
98   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
99   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
100   /*
101   ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize);
102   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
103   */
104   /* get the destination processors */
105   ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
106   recv_rank = send_rank + size;
107   np_subcomm = size/nsubcomm;
108   nleftover  = size - nsubcomm*np_subcomm;
109   nsends = 0; nrecvs = 0;
110   for (i=0; i<size; i++){ /* i=rank*/
111     if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
112       send_rank[nsends] = i; nsends++;
113       recv_rank[nrecvs++] = i;
114     }
115   }
116   if (rank >= size - nleftover){/* this proc is a leftover processor */
117     i = size-nleftover-1;
118     j = 0;
119     while (j < nsubcomm - nleftover){
120       send_rank[nsends++] = i;
121       i--; j++;
122     }
123   }
124 
125   if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
126     for (i=0; i<nleftover; i++){
127       recv_rank[nrecvs++] = size-nleftover+i;
128     }
129   }
130   if (prid == rank){
131     printf("[%d] sends to ",rank);
132     for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]);
133     printf("  \n");
134   }
135   /*
136   ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
137   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
138   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
139   */
140 
141   /* get this processor's nzlocal=nz_A+nz_B */
142   nz_A = a->nz; nz_B = b->nz;
143   nzlocal = nz_A + nz_B;
144 
145   /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */
146   itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
147   ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
148   ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
149 
150     rptr = sbuf_j;
151     cols = sbuf_j + rend-rstart + 1;
152     vals = sbuf_a;
153     rptr[0] = 0;
154     for (i=0; i<rend-rstart; i++){
155       row = i + rstart;
156       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
157       ncols  = nzA + nzB;
158       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
159       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
160       /* load the column indices for this row into cols */
161       lwrite = 0;
162       for (l=0; l<nzB; l++) {
163         if ((ctmp = bmap[cworkB[l]]) < cstart){
164           vals[lwrite]   = aworkB[l];
165           cols[lwrite++] = ctmp;
166         }
167       }
168       for (l=0; l<nzA; l++){
169         vals[lwrite]   = aworkA[l];
170         cols[lwrite++] = cstart + cworkA[l];
171       }
172       for (l=0; l<nzB; l++) {
173         if ((ctmp = bmap[cworkB[l]]) >= cend){
174           vals[lwrite]   = aworkB[l];
175           cols[lwrite++] = ctmp;
176         }
177       }
178       /* insert local matrix values into C */
179       /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */
180 
181       vals += ncols;
182       cols += ncols;
183       rptr[i+1] = rptr[i] + ncols;
184     }
185     /*
186     ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
187     ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
188     */
189     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
190 
191   /* send nzlocal to others, and recv other's nzlocal */
192   /*--------------------------------------------------*/
193   ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
194   rbuf_nz = sbuf_nz + nsends;
195 
196   ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
197   s_waits2 = s_waits1 + nsends;
198   s_waits3 = s_waits2 + nsends;
199   r_waits1 = s_waits3 + nsends;
200   r_waits2 = r_waits1 + nrecvs;
201   r_waits3 = r_waits2 + nrecvs;
202 
203   /* get some new tags to keep the communication clean */
204   ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr);
205   ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr);
206   ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr);
207 
208   /* post receives of other's nzlocal */
209   for (i=0; i<nrecvs; i++){
210     ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
211   }
212 
213   /* send nzlocal to others */
214   for (i=0; i<nsends; i++){
215     sbuf_nz[i] = nzlocal;
216     ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
217     if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]);
218   }
219 
220   /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
221   count = nrecvs;
222   while (count) {
223     ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
224     recv_rank[imdex] = recv_status.MPI_SOURCE;
225     /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */
226     ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
227     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
228 
229     itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
230     rbuf_nz[imdex] += itmp+2;
231     ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
232     ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
233     count--;
234   }
235 
236   /* wait on sends of nzlocal */
237   if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
238 
239   /* send mat->i,j and mat->a to others, and recv from other's */
240   /*-----------------------------------------------------------*/
241   for (i=0; i<nsends; i++){
242     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
243     itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
244     ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
245   }
246 
247   /* wait on receives of mat->i,j and mat->a */
248   /*-----------------------------------------*/
249   count = nrecvs;
250   while (count) {
251     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
252     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
253     ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
254     count--;
255   }
256 
257   /* wait on sends of mat->i,j and mat->a */
258   /*--------------------------------------*/
259   if (nsends) {
260     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
261     ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
262   }
263   ierr = PetscFree(sbuf_nz);CHKERRQ(ierr);
264   ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr);
265 
266   /* create redundant matrix */
267   /*-------------------------*/
268   ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
269   ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
270   ierr = MatSetFromOptions(C);CHKERRQ(ierr);
271 
272   /* insert local matrix entries */
273   rptr = sbuf_j;
274   cols = sbuf_j + rend-rstart + 1;
275   vals = sbuf_a;
276   for (i=0; i<rend-rstart; i++){
277     row   = i + rstart;
278     ncols = rptr[i+1] - rptr[i];
279     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
280     vals += ncols;
281     cols += ncols;
282   }
283   /* insert received matrix entries */
284   for (imdex=0; imdex<nrecvs; imdex++){
285 
286     rstart = rowrange[recv_rank[imdex]];
287     rend   = rowrange[recv_rank[imdex]+1];
288     rptr = rbuf_j[imdex];
289     cols = rbuf_j[imdex] + rend-rstart + 1;
290     vals = rbuf_a[imdex];
291     for (i=0; i<rend-rstart; i++){
292       row   = i + rstart;
293       ncols = rptr[i+1] - rptr[i];
294       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
295       vals += ncols;
296       cols += ncols;
297     }
298   }
299   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
300   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
301   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
302   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
303   *matredundant = C;
304 
305   /* free space */
306   ierr = PetscFree(send_rank);CHKERRQ(ierr);
307   ierr = PetscFree(sbuf_j);CHKERRQ(ierr);
308   ierr = PetscFree(sbuf_a);CHKERRQ(ierr);
309   for (i=0; i<nrecvs; i++){
310     ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr);
311     ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr);
312   }
313   ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "PCSetUp_Redundant"
319 static PetscErrorCode PCSetUp_Redundant(PC pc)
320 {
321   PC_Redundant   *red  = (PC_Redundant*)pc->data;
322   PetscErrorCode ierr;
323   PetscInt       mstart,mend,mlocal,m;
324   PetscMPIInt    size;
325   IS             isl;
326   MatReuse       reuse = MAT_INITIAL_MATRIX;
327   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
328   MPI_Comm       comm;
329   Vec            vec;
330 
331   PetscInt    mlocal_sub;
332   PetscMPIInt subsize,subrank;
333   PetscInt    rstart_sub,rend_sub,mloc_sub;
334   Mat         Aredundant;
335 
336   PetscFunctionBegin;
337   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
338   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
339   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
340   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
341   if (!pc->setupcalled) {
342     /* create working vectors xsub/ysub and xdup/ydup */
343     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
344     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
345 
346     /* get local size of xsub/ysub */
347     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
348     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
349     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
350     if (subrank+1 < subsize){
351       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
352     } else {
353       rend_sub = m;
354     }
355     mloc_sub = rend_sub - rstart_sub;
356     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
357     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
358     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
359 
360     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
361        Note: we use communicator dupcomm, not pc->comm! */
362     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
363     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
364 
365     /* create vec scatters */
366     if (!red->scatterin){
367       IS       is1,is2;
368       PetscInt *idx1,*idx2,i,j,k;
369       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
370       idx2 = idx1 + red->nsubcomm*mlocal;
371       j = 0;
372       for (k=0; k<red->nsubcomm; k++){
373         for (i=mstart; i<mend; i++){
374           idx1[j]   = i;
375           idx2[j++] = i + m*k;
376         }
377       }
378       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
379       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
380       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
381       ierr = ISDestroy(is1);CHKERRQ(ierr);
382       ierr = ISDestroy(is2);CHKERRQ(ierr);
383 
384       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
385       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
386       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
387       ierr = ISDestroy(is1);CHKERRQ(ierr);
388       ierr = ISDestroy(is2);CHKERRQ(ierr);
389       ierr = PetscFree(idx1);CHKERRQ(ierr);
390     }
391   }
392   ierr = VecDestroy(vec);CHKERRQ(ierr);
393 
394   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
396   if (size == 1) {
397     red->useparallelmat = PETSC_FALSE;
398   }
399 
400   if (red->useparallelmat) {
401     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
402       /* destroy old matrices */
403       if (red->pmats) {
404         ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
405       }
406     } else if (pc->setupcalled == 1) {
407       reuse = MAT_REUSE_MATRIX;
408       str   = SAME_NONZERO_PATTERN;
409     }
410 
411     /* grab the parallel matrix and put it into processors of a subcomminicator */
412     /*--------------------------------------------------------------------------*/
413     if (red->nsubcomm == size){
414       /* create sequential matrices in each processor */
415       ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
416       ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr);
417       ierr = ISDestroy(isl);CHKERRQ(ierr);
418       ierr = PCSetOperators(red->pc,red->pmats[0],red->pmats[0],str);CHKERRQ(ierr);
419     } else {
420       /* create mpi matrices in each subcommunicator */
421       ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
422       ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&Aredundant);CHKERRQ(ierr);
423       red->pmats_sub = Aredundant;
424       /* tell PC of the subcommunicator its operators */
425       ierr = PCSetOperators(red->pc,Aredundant,Aredundant,str);CHKERRQ(ierr);
426     }
427   } else {
428     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
429   }
430   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
431   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "PCApply_Redundant"
437 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
438 {
439   PC_Redundant   *red = (PC_Redundant*)pc->data;
440   PetscErrorCode ierr;
441   PetscScalar    *array;
442 
443   PetscFunctionBegin;
444   /* scatter x to xdup */
445   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
446   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
447 
448   /* place xdup's local array into xsub */
449   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
450   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
451 
452   /* apply preconditioner on each processor */
453   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
454   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
455   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
456 
457   /* place ysub's local array into ydup */
458   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
459   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
460 
461   /* scatter ydup to y */
462   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
463   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
464   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
465   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "PCDestroy_Redundant"
471 static PetscErrorCode PCDestroy_Redundant(PC pc)
472 {
473   PC_Redundant   *red = (PC_Redundant*)pc->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
478   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
479   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
480   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
481   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
482   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
483   if (red->pmats_sub) {
484     ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr);
485   }
486 
487   if (red->pmats) {
488     ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
489   }
490   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
491   ierr = PetscFree(red);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "PCSetFromOptions_Redundant"
497 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
498 {
499   PetscFunctionBegin;
500   PetscFunctionReturn(0);
501 }
502 
503 EXTERN_C_BEGIN
504 #undef __FUNCT__
505 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
506 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
507 {
508   PC_Redundant   *red = (PC_Redundant*)pc->data;
509   PetscErrorCode ierr;
510 
511   PetscFunctionBegin;
512   red->scatterin  = in;
513   red->scatterout = out;
514   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
515   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 EXTERN_C_END
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "PCRedundantSetScatter"
522 /*@
523    PCRedundantSetScatter - Sets the scatter used to copy values into the
524      redundant local solve and the scatter to move them back into the global
525      vector.
526 
527    Collective on PC
528 
529    Input Parameters:
530 +  pc - the preconditioner context
531 .  in - the scatter to move the values in
532 -  out - the scatter to move them out
533 
534    Level: advanced
535 
536 .keywords: PC, redundant solve
537 @*/
538 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
539 {
540   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
541 
542   PetscFunctionBegin;
543   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
544   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
545   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
546   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
547   if (f) {
548     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
549   }
550   PetscFunctionReturn(0);
551 }
552 
553 EXTERN_C_BEGIN
554 #undef __FUNCT__
555 #define __FUNCT__ "PCRedundantGetPC_Redundant"
556 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
557 {
558   PC_Redundant *red = (PC_Redundant*)pc->data;
559 
560   PetscFunctionBegin;
561   *innerpc = red->pc;
562   PetscFunctionReturn(0);
563 }
564 EXTERN_C_END
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "PCRedundantGetPC"
568 /*@
569    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
570 
571    Not Collective
572 
573    Input Parameter:
574 .  pc - the preconditioner context
575 
576    Output Parameter:
577 .  innerpc - the sequential PC
578 
579    Level: advanced
580 
581 .keywords: PC, redundant solve
582 @*/
583 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
584 {
585   PetscErrorCode ierr,(*f)(PC,PC*);
586 
587   PetscFunctionBegin;
588   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
589   PetscValidPointer(innerpc,2);
590   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
591   if (f) {
592     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 EXTERN_C_BEGIN
598 #undef __FUNCT__
599 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
600 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
601 {
602   PC_Redundant *red = (PC_Redundant*)pc->data;
603 
604   PetscFunctionBegin;
605   if (mat)  *mat  = red->pmats[0];
606   if (pmat) *pmat = red->pmats[0];
607   PetscFunctionReturn(0);
608 }
609 EXTERN_C_END
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "PCRedundantGetOperators"
613 /*@
614    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
615 
616    Not Collective
617 
618    Input Parameter:
619 .  pc - the preconditioner context
620 
621    Output Parameters:
622 +  mat - the matrix
623 -  pmat - the (possibly different) preconditioner matrix
624 
625    Level: advanced
626 
627 .keywords: PC, redundant solve
628 @*/
629 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
630 {
631   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
632 
633   PetscFunctionBegin;
634   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
635   if (mat)  PetscValidPointer(mat,2);
636   if (pmat) PetscValidPointer(pmat,3);
637   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
638   if (f) {
639     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
640   }
641   PetscFunctionReturn(0);
642 }
643 
644 /* -------------------------------------------------------------------------------------*/
645 /*MC
646      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
647 
648 
649      Options for the redundant preconditioners can be set with -redundant_pc_xxx
650 
651    Level: intermediate
652 
653 
654 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
655            PCRedundantGetPC(), PCRedundantGetOperators()
656 
657 M*/
658 
659 EXTERN_C_BEGIN
660 #undef __FUNCT__
661 #define __FUNCT__ "PCCreate_Redundant"
662 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
663 {
664   PetscErrorCode ierr;
665   PC_Redundant   *red;
666   const char     *prefix;
667 
668   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
669   PetscMPIInt    rank,size,subrank,*subsize;
670   MPI_Comm       subcomm;
671   PetscMPIInt    duprank;
672   PetscMPIInt    rank_dup,size_dup;
673   MPI_Comm       dupcomm;
674 
675   PetscFunctionBegin;
676   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
677   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
678   red->useparallelmat   = PETSC_TRUE;
679 
680   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
681   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
682   nsubcomm = size;
683   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
684   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
685 
686   /* get size of each subcommunicators */
687   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
688   np_subcomm = size/nsubcomm;
689   nleftover  = size - nsubcomm*np_subcomm;
690   for (i=0; i<nsubcomm; i++){
691     subsize[i] = np_subcomm;
692     if (i<nleftover) subsize[i]++;
693   }
694 
695   /* find color for this proc */
696   color   = rank%nsubcomm;
697   subrank = rank/nsubcomm;
698 
699   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
700   red->subcomm  = subcomm;
701   red->color    = color;
702   red->nsubcomm = nsubcomm;
703 
704   j = 0; duprank = 0;
705   for (i=0; i<nsubcomm; i++){
706     if (j == color){
707       duprank += subrank;
708       break;
709     }
710     duprank += subsize[i]; j++;
711   }
712   /*
713   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
714   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
715   */
716 
717   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
718   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
719   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
720   /*
721   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
722   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
723   */
724   red->dupcomm = dupcomm;
725   ierr = PetscFree(subsize);CHKERRQ(ierr);
726 
727   /* create the sequential PC that each processor has copy of */
728   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
729   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
730   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
731   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
732   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
733 
734   pc->ops->apply             = PCApply_Redundant;
735   pc->ops->applytranspose    = 0;
736   pc->ops->setup             = PCSetUp_Redundant;
737   pc->ops->destroy           = PCDestroy_Redundant;
738   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
739   pc->ops->setuponblocks     = 0;
740   pc->ops->view              = PCView_Redundant;
741   pc->ops->applyrichardson   = 0;
742 
743   pc->data     = (void*)red;
744 
745   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
746                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
747   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
748                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
749   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
750                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
751 
752   PetscFunctionReturn(0);
753 }
754 EXTERN_C_END
755