xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision a34d58eb8fd7d09a78a3844c6519d1c015444c52)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        *pmats;               /* matrix and optional preconditioner matrix */
14   Mat        pmats_sub;            /* matrix and optional preconditioner matrix belong to a subcommunicator */
15   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
16   PetscTruth useparallelmat;
17   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
18   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
19                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
20   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
21   PetscInt   color;                /* color of processors in a subcommunicator */
22 } PC_Redundant;
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "PCView_Redundant"
26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
27 {
28   PC_Redundant   *red = (PC_Redundant*)pc->data;
29   PetscErrorCode ierr;
30   PetscMPIInt    rank;
31   PetscTruth     iascii,isstring;
32   PetscViewer    sviewer;
33 
34   PetscFunctionBegin;
35   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
37   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
38   if (iascii) {
39     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
40     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
41     if (!rank) {
42       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
43       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
44       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
45     }
46     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
47   } else if (isstring) {
48     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
49     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
50     if (!rank) {
51       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
54   } else {
55     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
61 #include "private/vecimpl.h"
62 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
63 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatGetRedundantMatrix"
67 PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
68 {
69   PetscMPIInt    rank,size,subrank,subsize;
70   MPI_Comm       comm=mat->comm;
71   PetscErrorCode ierr;
72   PetscInt       nsends,nrecvs,i,prid=100,itmp;
73   PetscMPIInt    *send_rank,*recv_rank;
74   PetscInt       *rowrange=mat->rmap.range,nzlocal;
75   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
76   Mat            A=aij->A,B=aij->B;
77   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
78   Mat            C=*matredundant;
79 
80   PetscInt       nleftover,np_subcomm,j;
81   PetscInt       nz_A,nz_B,*sbuf_j;
82   PetscScalar    *sbuf_a;
83   PetscInt       cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
84   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
85   PetscInt       *cols,ctmp,lwrite,*rptr,l;
86   PetscScalar    *vals,*aworkA,*aworkB;
87   PetscMPIInt    tag1,tag2,tag3,imdex;
88   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
89   MPI_Status     recv_status,*send_status;
90   PetscInt       *sbuf_nz,*rbuf_nz,count;
91   PetscInt       **rbuf_j;
92   PetscScalar    **rbuf_a;
93 
94   PetscFunctionBegin;
95   if (reuse == MAT_REUSE_MATRIX) {
96     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
97     if (M != N || M != mat->rmap.N) {
98       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong global size");
99     }
100     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
101     if (M != N || M != mlocal_sub) {
102       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong local size");
103     }
104   }
105 
106   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
109   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
110   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
111   /*
112   ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize);
113   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
114   */
115   /* get the destination processors */
116   ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
117   recv_rank = send_rank + size;
118   np_subcomm = size/nsubcomm;
119   nleftover  = size - nsubcomm*np_subcomm;
120   nsends = 0; nrecvs = 0;
121   for (i=0; i<size; i++){ /* i=rank*/
122     if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
123       send_rank[nsends] = i; nsends++;
124       recv_rank[nrecvs++] = i;
125     }
126   }
127   if (rank >= size - nleftover){/* this proc is a leftover processor */
128     i = size-nleftover-1;
129     j = 0;
130     while (j < nsubcomm - nleftover){
131       send_rank[nsends++] = i;
132       i--; j++;
133     }
134   }
135 
136   if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
137     for (i=0; i<nleftover; i++){
138       recv_rank[nrecvs++] = size-nleftover+i;
139     }
140   }
141   if (prid == rank){
142     printf("[%d] sends to ",rank);
143     for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]);
144     printf("  \n");
145   }
146   /*
147   ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
148   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
149   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
150   */
151 
152   /* get this processor's nzlocal=nz_A+nz_B */
153   nz_A = a->nz; nz_B = b->nz;
154   nzlocal = nz_A + nz_B;
155 
156   /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */
157   itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
158   ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
159   ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
160 
161     rptr = sbuf_j;
162     cols = sbuf_j + rend-rstart + 1;
163     vals = sbuf_a;
164     rptr[0] = 0;
165     for (i=0; i<rend-rstart; i++){
166       row = i + rstart;
167       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
168       ncols  = nzA + nzB;
169       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
170       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
171       /* load the column indices for this row into cols */
172       lwrite = 0;
173       for (l=0; l<nzB; l++) {
174         if ((ctmp = bmap[cworkB[l]]) < cstart){
175           vals[lwrite]   = aworkB[l];
176           cols[lwrite++] = ctmp;
177         }
178       }
179       for (l=0; l<nzA; l++){
180         vals[lwrite]   = aworkA[l];
181         cols[lwrite++] = cstart + cworkA[l];
182       }
183       for (l=0; l<nzB; l++) {
184         if ((ctmp = bmap[cworkB[l]]) >= cend){
185           vals[lwrite]   = aworkB[l];
186           cols[lwrite++] = ctmp;
187         }
188       }
189       /* insert local matrix values into C */
190       /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */
191 
192       vals += ncols;
193       cols += ncols;
194       rptr[i+1] = rptr[i] + ncols;
195     }
196     /*
197     ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198     ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
199     */
200     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
201 
202   /* send nzlocal to others, and recv other's nzlocal */
203   /*--------------------------------------------------*/
204   ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
205   rbuf_nz = sbuf_nz + nsends;
206 
207   ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
208   s_waits2 = s_waits1 + nsends;
209   s_waits3 = s_waits2 + nsends;
210   r_waits1 = s_waits3 + nsends;
211   r_waits2 = r_waits1 + nrecvs;
212   r_waits3 = r_waits2 + nrecvs;
213 
214   /* get some new tags to keep the communication clean */
215   ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr);
216   ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr);
217   ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr);
218 
219   /* post receives of other's nzlocal */
220   for (i=0; i<nrecvs; i++){
221     ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
222   }
223 
224   /* send nzlocal to others */
225   for (i=0; i<nsends; i++){
226     sbuf_nz[i] = nzlocal;
227     ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
228     if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]);
229   }
230 
231   /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
232   count = nrecvs;
233   while (count) {
234     ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
235     recv_rank[imdex] = recv_status.MPI_SOURCE;
236     /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */
237     ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
238     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
239 
240     itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
241     rbuf_nz[imdex] += itmp+2;
242     ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
243     ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
244     count--;
245   }
246 
247   /* wait on sends of nzlocal */
248   if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
249 
250   /* send mat->i,j and mat->a to others, and recv from other's */
251   /*-----------------------------------------------------------*/
252   for (i=0; i<nsends; i++){
253     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
254     itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
255     ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
256   }
257 
258   /* wait on receives of mat->i,j and mat->a */
259   /*-----------------------------------------*/
260   count = nrecvs;
261   while (count) {
262     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
263     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
264     ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
265     count--;
266   }
267 
268   /* wait on sends of mat->i,j and mat->a */
269   /*--------------------------------------*/
270   if (nsends) {
271     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
272     ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
273   }
274   ierr = PetscFree(sbuf_nz);CHKERRQ(ierr);
275   ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr);
276 
277   /* create redundant matrix */
278   /*-------------------------*/
279   if (reuse == MAT_INITIAL_MATRIX){
280     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
281     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
282     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
283   } else {
284     C = *matredundant;
285   }
286   /* insert local matrix entries */
287   rptr = sbuf_j;
288   cols = sbuf_j + rend-rstart + 1;
289   vals = sbuf_a;
290   for (i=0; i<rend-rstart; i++){
291     row   = i + rstart;
292     ncols = rptr[i+1] - rptr[i];
293     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
294     vals += ncols;
295     cols += ncols;
296   }
297   /* insert received matrix entries */
298   for (imdex=0; imdex<nrecvs; imdex++){
299 
300     rstart = rowrange[recv_rank[imdex]];
301     rend   = rowrange[recv_rank[imdex]+1];
302     rptr = rbuf_j[imdex];
303     cols = rbuf_j[imdex] + rend-rstart + 1;
304     vals = rbuf_a[imdex];
305     for (i=0; i<rend-rstart; i++){
306       row   = i + rstart;
307       ncols = rptr[i+1] - rptr[i];
308       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
309       vals += ncols;
310       cols += ncols;
311     }
312   }
313   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
314   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
315   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
316   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
317   if (reuse == MAT_INITIAL_MATRIX){
318     *matredundant = C;
319   }
320 
321   /* free space */
322   ierr = PetscFree(send_rank);CHKERRQ(ierr);
323   ierr = PetscFree(sbuf_j);CHKERRQ(ierr);
324   ierr = PetscFree(sbuf_a);CHKERRQ(ierr);
325   for (i=0; i<nrecvs; i++){
326     ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr);
327     ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr);
328   }
329   ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr);
330   PetscFunctionReturn(0);
331 }
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "PCSetUp_Redundant"
335 static PetscErrorCode PCSetUp_Redundant(PC pc)
336 {
337   PC_Redundant   *red  = (PC_Redundant*)pc->data;
338   PetscErrorCode ierr;
339   PetscInt       mstart,mend,mlocal,m;
340   PetscMPIInt    size;
341   MatReuse       reuse = MAT_INITIAL_MATRIX;
342   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
343   MPI_Comm       comm;
344   Vec            vec;
345 
346   PetscInt       mlocal_sub;
347   PetscMPIInt    subsize,subrank;
348   PetscInt       rstart_sub,rend_sub,mloc_sub;
349 
350   PetscFunctionBegin;
351   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
352   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
353   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
354   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
355   if (!pc->setupcalled) {
356     /* create working vectors xsub/ysub and xdup/ydup */
357     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
358     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
359 
360     /* get local size of xsub/ysub */
361     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
362     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
363     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
364     if (subrank+1 < subsize){
365       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
366     } else {
367       rend_sub = m;
368     }
369     mloc_sub = rend_sub - rstart_sub;
370     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
371     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
372     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
373 
374     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
375        Note: we use communicator dupcomm, not pc->comm! */
376     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
377     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
378 
379     /* create vec scatters */
380     if (!red->scatterin){
381       IS       is1,is2;
382       PetscInt *idx1,*idx2,i,j,k;
383       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
384       idx2 = idx1 + red->nsubcomm*mlocal;
385       j = 0;
386       for (k=0; k<red->nsubcomm; k++){
387         for (i=mstart; i<mend; i++){
388           idx1[j]   = i;
389           idx2[j++] = i + m*k;
390         }
391       }
392       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
393       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
394       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
395       ierr = ISDestroy(is1);CHKERRQ(ierr);
396       ierr = ISDestroy(is2);CHKERRQ(ierr);
397 
398       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
399       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
400       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
401       ierr = ISDestroy(is1);CHKERRQ(ierr);
402       ierr = ISDestroy(is2);CHKERRQ(ierr);
403       ierr = PetscFree(idx1);CHKERRQ(ierr);
404     }
405   }
406   ierr = VecDestroy(vec);CHKERRQ(ierr);
407 
408   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
410   if (size == 1) {
411     red->useparallelmat = PETSC_FALSE;
412   }
413 
414   if (red->useparallelmat) {
415     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
416       /* destroy old matrices */
417       if (red->pmats) {
418         ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
419       }
420     } else if (pc->setupcalled == 1) {
421       reuse = MAT_REUSE_MATRIX;
422       str   = SAME_NONZERO_PATTERN;
423     }
424 
425     /* grab the parallel matrix and put it into processors of a subcomminicator */
426     /*--------------------------------------------------------------------------*/
427     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
428     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats_sub);CHKERRQ(ierr);
429     /* tell PC of the subcommunicator its operators */
430     ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr);
431   } else {
432     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
433   }
434   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
435   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "PCApply_Redundant"
441 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
442 {
443   PC_Redundant   *red = (PC_Redundant*)pc->data;
444   PetscErrorCode ierr;
445   PetscScalar    *array;
446 
447   PetscFunctionBegin;
448   /* scatter x to xdup */
449   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
450   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
451 
452   /* place xdup's local array into xsub */
453   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
454   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
455 
456   /* apply preconditioner on each processor */
457   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
458   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
459   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
460 
461   /* place ysub's local array into ydup */
462   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
463   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
464 
465   /* scatter ydup to y */
466   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
467   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
468   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
469   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "PCDestroy_Redundant"
475 static PetscErrorCode PCDestroy_Redundant(PC pc)
476 {
477   PC_Redundant   *red = (PC_Redundant*)pc->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
482   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
483   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
484   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
485   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
486   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
487   if (red->pmats_sub) {
488     ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr);
489   }
490 
491   if (red->pmats) {
492     ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
493   }
494   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
495   ierr = PetscFree(red);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNCT__
500 #define __FUNCT__ "PCSetFromOptions_Redundant"
501 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
502 {
503   PetscFunctionBegin;
504   PetscFunctionReturn(0);
505 }
506 
507 EXTERN_C_BEGIN
508 #undef __FUNCT__
509 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
510 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
511 {
512   PC_Redundant   *red = (PC_Redundant*)pc->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   red->scatterin  = in;
517   red->scatterout = out;
518   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
519   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 EXTERN_C_END
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "PCRedundantSetScatter"
526 /*@
527    PCRedundantSetScatter - Sets the scatter used to copy values into the
528      redundant local solve and the scatter to move them back into the global
529      vector.
530 
531    Collective on PC
532 
533    Input Parameters:
534 +  pc - the preconditioner context
535 .  in - the scatter to move the values in
536 -  out - the scatter to move them out
537 
538    Level: advanced
539 
540 .keywords: PC, redundant solve
541 @*/
542 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
543 {
544   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
545 
546   PetscFunctionBegin;
547   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
548   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
549   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
550   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
551   if (f) {
552     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
553   }
554   PetscFunctionReturn(0);
555 }
556 
557 EXTERN_C_BEGIN
558 #undef __FUNCT__
559 #define __FUNCT__ "PCRedundantGetPC_Redundant"
560 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
561 {
562   PC_Redundant *red = (PC_Redundant*)pc->data;
563 
564   PetscFunctionBegin;
565   *innerpc = red->pc;
566   PetscFunctionReturn(0);
567 }
568 EXTERN_C_END
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "PCRedundantGetPC"
572 /*@
573    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
574 
575    Not Collective
576 
577    Input Parameter:
578 .  pc - the preconditioner context
579 
580    Output Parameter:
581 .  innerpc - the sequential PC
582 
583    Level: advanced
584 
585 .keywords: PC, redundant solve
586 @*/
587 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
588 {
589   PetscErrorCode ierr,(*f)(PC,PC*);
590 
591   PetscFunctionBegin;
592   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
593   PetscValidPointer(innerpc,2);
594   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
595   if (f) {
596     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 EXTERN_C_BEGIN
602 #undef __FUNCT__
603 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
604 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
605 {
606   PC_Redundant *red = (PC_Redundant*)pc->data;
607 
608   PetscFunctionBegin;
609   if (mat)  *mat  = red->pmats[0];
610   if (pmat) *pmat = red->pmats[0];
611   PetscFunctionReturn(0);
612 }
613 EXTERN_C_END
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "PCRedundantGetOperators"
617 /*@
618    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
619 
620    Not Collective
621 
622    Input Parameter:
623 .  pc - the preconditioner context
624 
625    Output Parameters:
626 +  mat - the matrix
627 -  pmat - the (possibly different) preconditioner matrix
628 
629    Level: advanced
630 
631 .keywords: PC, redundant solve
632 @*/
633 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
634 {
635   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
636 
637   PetscFunctionBegin;
638   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
639   if (mat)  PetscValidPointer(mat,2);
640   if (pmat) PetscValidPointer(pmat,3);
641   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
642   if (f) {
643     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 /* -------------------------------------------------------------------------------------*/
649 /*MC
650      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
651 
652 
653      Options for the redundant preconditioners can be set with -redundant_pc_xxx
654 
655    Level: intermediate
656 
657 
658 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
659            PCRedundantGetPC(), PCRedundantGetOperators()
660 
661 M*/
662 
663 EXTERN_C_BEGIN
664 #undef __FUNCT__
665 #define __FUNCT__ "PCCreate_Redundant"
666 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
667 {
668   PetscErrorCode ierr;
669   PC_Redundant   *red;
670   const char     *prefix;
671 
672   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
673   PetscMPIInt    rank,size,subrank,*subsize;
674   MPI_Comm       subcomm;
675   PetscMPIInt    duprank;
676   PetscMPIInt    rank_dup,size_dup;
677   MPI_Comm       dupcomm;
678 
679   PetscFunctionBegin;
680   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
681   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
682   red->useparallelmat   = PETSC_TRUE;
683 
684   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
685   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
686   nsubcomm = size;
687   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
688   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
689 
690   /* get size of each subcommunicators */
691   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
692   np_subcomm = size/nsubcomm;
693   nleftover  = size - nsubcomm*np_subcomm;
694   for (i=0; i<nsubcomm; i++){
695     subsize[i] = np_subcomm;
696     if (i<nleftover) subsize[i]++;
697   }
698 
699   /* find color for this proc */
700   color   = rank%nsubcomm;
701   subrank = rank/nsubcomm;
702 
703   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
704   red->subcomm  = subcomm;
705   red->color    = color;
706   red->nsubcomm = nsubcomm;
707 
708   j = 0; duprank = 0;
709   for (i=0; i<nsubcomm; i++){
710     if (j == color){
711       duprank += subrank;
712       break;
713     }
714     duprank += subsize[i]; j++;
715   }
716   /*
717   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
718   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
719   */
720 
721   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
722   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
723   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
724   /*
725   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
726   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
727   */
728   red->dupcomm = dupcomm;
729   ierr = PetscFree(subsize);CHKERRQ(ierr);
730 
731   /* create the sequential PC that each processor has copy of */
732   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
733   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
734   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
735   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
736   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
737 
738   pc->ops->apply             = PCApply_Redundant;
739   pc->ops->applytranspose    = 0;
740   pc->ops->setup             = PCSetUp_Redundant;
741   pc->ops->destroy           = PCDestroy_Redundant;
742   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
743   pc->ops->setuponblocks     = 0;
744   pc->ops->view              = PCView_Redundant;
745   pc->ops->applyrichardson   = 0;
746 
747   pc->data     = (void*)red;
748 
749   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
750                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
752                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
754                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
755 
756   PetscFunctionReturn(0);
757 }
758 EXTERN_C_END
759