xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision ca6161988a13f5f1c12dfd3b7a0b9d56a75fe920)
1 #define PETSCKSP_DLL
2 
3 /*
4   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
5 */
6 #include "private/pcimpl.h"     /*I "petscpc.h" I*/
7 #include "petscksp.h"
8 
9 typedef struct {
10   PC         pc;                   /* actual preconditioner used on each processor */
11   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
12   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13   Mat        *pmats;               /* matrix and optional preconditioner matrix */
14   Mat        pmats_sub;            /* matrix and optional preconditioner matrix */
15   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
16   PetscTruth useparallelmat;
17   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
18   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
19                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
20   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
21   PetscInt   color;                /* color of processors in a subcommunicator */
22 } PC_Redundant;
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "PCView_Redundant"
26 static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
27 {
28   PC_Redundant   *red = (PC_Redundant*)pc->data;
29   PetscErrorCode ierr;
30   PetscMPIInt    rank;
31   PetscTruth     iascii,isstring;
32   PetscViewer    sviewer;
33 
34   PetscFunctionBegin;
35   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
36   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
37   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
38   if (iascii) {
39     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
40     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
41     if (!rank) {
42       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
43       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
44       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
45     }
46     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
47   } else if (isstring) {
48     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
49     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
50     if (!rank) {
51       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
52     }
53     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
54   } else {
55     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
61 #include "private/vecimpl.h"
62 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
63 #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatGetRedundantMatrix"
67 PetscErrorCode MatGetRedundantMatrix_AIJ(PC pc,Mat mat,MPI_Comm subcomm,PetscInt mlocal_sub,Mat *matredundant)
68 {
69   PetscMPIInt    rank,size,subrank,subsize;
70   MPI_Comm       comm=mat->comm;
71   PetscErrorCode ierr;
72   PC_Redundant   *red=(PC_Redundant*)pc->data;
73   PetscInt       nsubcomm=red->nsubcomm,nsends,nrecvs,i,prid=100,itmp;
74   PetscMPIInt    *send_rank,*recv_rank;
75   PetscInt       *rowrange=pc->pmat->rmap.range,nzlocal;
76   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
77   Mat            A=aij->A,B=aij->B;
78   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
79   Mat            C;
80 
81   PetscInt       nleftover,np_subcomm,j;
82   PetscInt       nz_A,nz_B,*sbuf_j;
83   PetscScalar    *sbuf_a;
84   PetscInt       cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
85   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
86   PetscInt       *cols,ctmp,lwrite,*rptr,l;
87   PetscScalar    *vals,*aworkA,*aworkB;
88 
89   PetscMPIInt    tag1,tag2,tag3,imdex;
90   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
91   MPI_Status     recv_status,*send_status;
92   PetscInt       *sbuf_nz,*rbuf_nz,count;
93 
94   PetscInt     **rbuf_j;
95   PetscScalar  **rbuf_a;
96 
97   PetscFunctionBegin;
98   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
99   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
101   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
102   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
103   /*
104   ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize);
105   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
106   */
107   /* get the destination processors */
108   ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
109   recv_rank = send_rank + size;
110   np_subcomm = size/nsubcomm;
111   nleftover  = size - nsubcomm*np_subcomm;
112   nsends = 0; nrecvs = 0;
113   for (i=0; i<size; i++){ /* i=rank*/
114     if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
115       send_rank[nsends] = i; nsends++;
116       recv_rank[nrecvs++] = i;
117     }
118   }
119   if (rank >= size - nleftover){/* this proc is a leftover processor */
120     i = size-nleftover-1;
121     j = 0;
122     while (j < nsubcomm - nleftover){
123       send_rank[nsends++] = i;
124       i--; j++;
125     }
126   }
127 
128   if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
129     for (i=0; i<nleftover; i++){
130       recv_rank[nrecvs++] = size-nleftover+i;
131     }
132   }
133   if (prid == rank){
134     printf("[%d] sends to ",rank);
135     for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]);
136     printf("  \n");
137   }
138   /*
139   ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
140   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
141   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
142   */
143 
144   /* get this processor's nzlocal=nz_A+nz_B */
145   nz_A = a->nz; nz_B = b->nz;
146   nzlocal = nz_A + nz_B;
147 
148   /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */
149   itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
150   ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
151   ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
152 
153     rptr = sbuf_j;
154     cols = sbuf_j + rend-rstart + 1;
155     vals = sbuf_a;
156     rptr[0] = 0;
157     for (i=0; i<rend-rstart; i++){
158       row = i + rstart;
159       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
160       ncols  = nzA + nzB;
161       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
162       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
163       /* load the column indices for this row into cols */
164       lwrite = 0;
165       for (l=0; l<nzB; l++) {
166         if ((ctmp = bmap[cworkB[l]]) < cstart){
167           vals[lwrite]   = aworkB[l];
168           cols[lwrite++] = ctmp;
169         }
170       }
171       for (l=0; l<nzA; l++){
172         vals[lwrite]   = aworkA[l];
173         cols[lwrite++] = cstart + cworkA[l];
174       }
175       for (l=0; l<nzB; l++) {
176         if ((ctmp = bmap[cworkB[l]]) >= cend){
177           vals[lwrite]   = aworkB[l];
178           cols[lwrite++] = ctmp;
179         }
180       }
181       /* insert local matrix values into C */
182       /* ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); */
183 
184       vals += ncols;
185       cols += ncols;
186       rptr[i+1] = rptr[i] + ncols;
187     }
188     /*
189     ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
190     ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191     */
192     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
193 
194   /* send nzlocal to others, and recv other's nzlocal */
195   /*--------------------------------------------------*/
196   ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
197   rbuf_nz = sbuf_nz + nsends;
198 
199   ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
200   s_waits2 = s_waits1 + nsends;
201   s_waits3 = s_waits2 + nsends;
202   r_waits1 = s_waits3 + nsends;
203   r_waits2 = r_waits1 + nrecvs;
204   r_waits3 = r_waits2 + nrecvs;
205 
206   /* get some new tags to keep the communication clean */
207   ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr);
208   ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr);
209   ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr);
210 
211   /* post receives of other's nzlocal */
212   for (i=0; i<nrecvs; i++){
213     ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
214   }
215 
216   /* send nzlocal to others */
217   for (i=0; i<nsends; i++){
218     sbuf_nz[i] = nzlocal;
219     ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
220     if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]);
221   }
222 
223   /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
224   count = nrecvs;
225   while (count) {
226     ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
227     recv_rank[imdex] = recv_status.MPI_SOURCE;
228     /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */
229     ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
230     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
231 
232     itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
233     rbuf_nz[imdex] += itmp+2;
234     ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
235     ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
236     count--;
237   }
238 
239   /* wait on sends of nzlocal */
240   if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
241 
242   /* send mat->i,j and mat->a to others, and recv from other's */
243   /*-----------------------------------------------------------*/
244   for (i=0; i<nsends; i++){
245     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
246     itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
247     ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
248   }
249 
250   /* wait on receives of mat->i,j and mat->a */
251   /*-----------------------------------------*/
252   count = nrecvs;
253   while (count) {
254     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
255     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
256     ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
257     count--;
258   }
259 
260   /* wait on sends of mat->i,j and mat->a */
261   /*--------------------------------------*/
262   if (nsends) {
263     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
264     ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
265   }
266   ierr = PetscFree(sbuf_nz);CHKERRQ(ierr);
267   ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr);
268 
269   /* create redundant matrix */
270   /*-------------------------*/
271   ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
272   ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
273   ierr = MatSetFromOptions(C);CHKERRQ(ierr);
274 
275   /* insert local matrix entries */
276   rptr = sbuf_j;
277   cols = sbuf_j + rend-rstart + 1;
278   vals = sbuf_a;
279   for (i=0; i<rend-rstart; i++){
280     row   = i + rstart;
281     ncols = rptr[i+1] - rptr[i];
282     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
283     vals += ncols;
284     cols += ncols;
285   }
286   /* insert received matrix entries */
287   for (imdex=0; imdex<nrecvs; imdex++){
288 
289     rstart = rowrange[recv_rank[imdex]];
290     rend   = rowrange[recv_rank[imdex]+1];
291     rptr = rbuf_j[imdex];
292     cols = rbuf_j[imdex] + rend-rstart + 1;
293     vals = rbuf_a[imdex];
294     for (i=0; i<rend-rstart; i++){
295       row   = i + rstart;
296       ncols = rptr[i+1] - rptr[i];
297       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
298       vals += ncols;
299       cols += ncols;
300     }
301   }
302   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
304   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
305   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
306   *matredundant = C;
307 
308   /* free space */
309   ierr = PetscFree(send_rank);CHKERRQ(ierr);
310   ierr = PetscFree(sbuf_j);CHKERRQ(ierr);
311   ierr = PetscFree(sbuf_a);CHKERRQ(ierr);
312   for (i=0; i<nrecvs; i++){
313     ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr);
314     ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr);
315   }
316   ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 #undef __FUNCT__
321 #define __FUNCT__ "PCSetUp_Redundant"
322 static PetscErrorCode PCSetUp_Redundant(PC pc)
323 {
324   PC_Redundant   *red  = (PC_Redundant*)pc->data;
325   PetscErrorCode ierr;
326   PetscInt       mstart,mend,mlocal,m;
327   PetscMPIInt    size;
328   IS             isl;
329   MatReuse       reuse = MAT_INITIAL_MATRIX;
330   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
331   MPI_Comm       comm;
332   Vec            vec;
333 
334   PetscInt    mlocal_sub;
335   PetscMPIInt subsize,subrank;
336   PetscInt    rstart_sub,rend_sub,mloc_sub;
337   Mat         Aredundant;
338 
339   PetscFunctionBegin;
340   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
341   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
342   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
343   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
344   if (!pc->setupcalled) {
345     /* create working vectors xsub/ysub and xdup/ydup */
346     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
347     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
348 
349     /* get local size of xsub/ysub */
350     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
351     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
352     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
353     if (subrank+1 < subsize){
354       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
355     } else {
356       rend_sub = m;
357     }
358     mloc_sub = rend_sub - rstart_sub;
359     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
360     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
361     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
362 
363     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
364        Note: we use communicator dupcomm, not pc->comm! */
365     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
366     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
367 
368     /* create vec scatters */
369     if (!red->scatterin){
370       IS       is1,is2;
371       PetscInt *idx1,*idx2,i,j,k;
372       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
373       idx2 = idx1 + red->nsubcomm*mlocal;
374       j = 0;
375       for (k=0; k<red->nsubcomm; k++){
376         for (i=mstart; i<mend; i++){
377           idx1[j]   = i;
378           idx2[j++] = i + m*k;
379         }
380       }
381       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
382       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
383       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
384       ierr = ISDestroy(is1);CHKERRQ(ierr);
385       ierr = ISDestroy(is2);CHKERRQ(ierr);
386 
387       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
388       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
389       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
390       ierr = ISDestroy(is1);CHKERRQ(ierr);
391       ierr = ISDestroy(is2);CHKERRQ(ierr);
392       ierr = PetscFree(idx1);CHKERRQ(ierr);
393     }
394   }
395   ierr = VecDestroy(vec);CHKERRQ(ierr);
396 
397   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
399   if (size == 1) {
400     red->useparallelmat = PETSC_FALSE;
401   }
402 
403   if (red->useparallelmat) {
404     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
405       /* destroy old matrices */
406       if (red->pmats) {
407         ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
408       }
409     } else if (pc->setupcalled == 1) {
410       reuse = MAT_REUSE_MATRIX;
411       str   = SAME_NONZERO_PATTERN;
412     }
413 
414     /* ================== matrix ============= */
415     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
416     ierr = MatGetRedundantMatrix_AIJ(pc,pc->pmat,red->subcomm,mlocal_sub,&Aredundant);CHKERRQ(ierr);
417 
418     /* grab the parallel matrix and put it into processors of a subcomminicator */
419     ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
420     ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr);
421     ierr = ISDestroy(isl);CHKERRQ(ierr);
422 
423     /* ------- temporarily set a mpi matrix pmats_sub- provided by user! --*/
424     ierr = MatCreate(red->subcomm,&red->pmats_sub);CHKERRQ(ierr);
425     ierr = MatSetSizes(red->pmats_sub,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
426     ierr = MatSetFromOptions(red->pmats_sub);CHKERRQ(ierr);
427     {
428       PetscInt          Istart,Iend,ncols,i;
429       const PetscInt    *cols;
430       const PetscScalar *vals;
431       PetscTruth flg;
432       ierr = MatGetOwnershipRange(red->pmats_sub,&Istart,&Iend);CHKERRQ(ierr);
433       for (i=Istart; i<Iend; i++) {
434         ierr = MatGetRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr);
435         ierr = MatSetValues(red->pmats_sub,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
436         ierr = MatRestoreRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr);
437       }
438       ierr = MatAssemblyBegin(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
439       ierr = MatAssemblyEnd(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
440 
441       ierr = MatEqual(red->pmats_sub,Aredundant,&flg);CHKERRQ(ierr);
442       if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"pmats_sub !=Aredundant ");
443       ierr = MatDestroy(red->pmats_sub);
444     }
445     red->pmats_sub = Aredundant;
446 
447     /* tell PC of the subcommunicator its operators */
448     ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr);
449   } else {
450     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
451   }
452   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
453   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "PCApply_Redundant"
459 static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
460 {
461   PC_Redundant   *red = (PC_Redundant*)pc->data;
462   PetscErrorCode ierr;
463   PetscScalar    *array;
464 
465   PetscFunctionBegin;
466   /* scatter x to xdup */
467   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
468   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
469 
470   /* place xdup's local array into xsub */
471   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
472   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
473 
474   /* apply preconditioner on each processor */
475   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
476   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
477   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
478 
479   /* place ysub's local array into ydup */
480   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
481   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
482 
483   /* scatter ydup to y */
484   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
485   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
486   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
487   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "PCDestroy_Redundant"
493 static PetscErrorCode PCDestroy_Redundant(PC pc)
494 {
495   PC_Redundant   *red = (PC_Redundant*)pc->data;
496   PetscErrorCode ierr;
497 
498   PetscFunctionBegin;
499   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
500   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
501   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
502   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
503   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
504   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
505   if (red->pmats_sub) {
506     ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr);
507   }
508 
509   if (red->pmats) {
510     ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
511   }
512   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
513   ierr = PetscFree(red);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "PCSetFromOptions_Redundant"
519 static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
520 {
521   PetscFunctionBegin;
522   PetscFunctionReturn(0);
523 }
524 
525 EXTERN_C_BEGIN
526 #undef __FUNCT__
527 #define __FUNCT__ "PCRedundantSetScatter_Redundant"
528 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
529 {
530   PC_Redundant   *red = (PC_Redundant*)pc->data;
531   PetscErrorCode ierr;
532 
533   PetscFunctionBegin;
534   red->scatterin  = in;
535   red->scatterout = out;
536   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
537   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
538   PetscFunctionReturn(0);
539 }
540 EXTERN_C_END
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "PCRedundantSetScatter"
544 /*@
545    PCRedundantSetScatter - Sets the scatter used to copy values into the
546      redundant local solve and the scatter to move them back into the global
547      vector.
548 
549    Collective on PC
550 
551    Input Parameters:
552 +  pc - the preconditioner context
553 .  in - the scatter to move the values in
554 -  out - the scatter to move them out
555 
556    Level: advanced
557 
558 .keywords: PC, redundant solve
559 @*/
560 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
561 {
562   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
563 
564   PetscFunctionBegin;
565   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
566   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
567   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
568   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
569   if (f) {
570     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
571   }
572   PetscFunctionReturn(0);
573 }
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "PCRedundantGetPC_Redundant"
578 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
579 {
580   PC_Redundant *red = (PC_Redundant*)pc->data;
581 
582   PetscFunctionBegin;
583   *innerpc = red->pc;
584   PetscFunctionReturn(0);
585 }
586 EXTERN_C_END
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "PCRedundantGetPC"
590 /*@
591    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
592 
593    Not Collective
594 
595    Input Parameter:
596 .  pc - the preconditioner context
597 
598    Output Parameter:
599 .  innerpc - the sequential PC
600 
601    Level: advanced
602 
603 .keywords: PC, redundant solve
604 @*/
605 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
606 {
607   PetscErrorCode ierr,(*f)(PC,PC*);
608 
609   PetscFunctionBegin;
610   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
611   PetscValidPointer(innerpc,2);
612   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
613   if (f) {
614     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 EXTERN_C_BEGIN
620 #undef __FUNCT__
621 #define __FUNCT__ "PCRedundantGetOperators_Redundant"
622 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
623 {
624   PC_Redundant *red = (PC_Redundant*)pc->data;
625 
626   PetscFunctionBegin;
627   if (mat)  *mat  = red->pmats[0];
628   if (pmat) *pmat = red->pmats[0];
629   PetscFunctionReturn(0);
630 }
631 EXTERN_C_END
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "PCRedundantGetOperators"
635 /*@
636    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
637 
638    Not Collective
639 
640    Input Parameter:
641 .  pc - the preconditioner context
642 
643    Output Parameters:
644 +  mat - the matrix
645 -  pmat - the (possibly different) preconditioner matrix
646 
647    Level: advanced
648 
649 .keywords: PC, redundant solve
650 @*/
651 PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
652 {
653   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
654 
655   PetscFunctionBegin;
656   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
657   if (mat)  PetscValidPointer(mat,2);
658   if (pmat) PetscValidPointer(pmat,3);
659   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
660   if (f) {
661     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 /* -------------------------------------------------------------------------------------*/
667 /*MC
668      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
669 
670 
671      Options for the redundant preconditioners can be set with -redundant_pc_xxx
672 
673    Level: intermediate
674 
675 
676 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
677            PCRedundantGetPC(), PCRedundantGetOperators()
678 
679 M*/
680 
681 EXTERN_C_BEGIN
682 #undef __FUNCT__
683 #define __FUNCT__ "PCCreate_Redundant"
684 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
685 {
686   PetscErrorCode ierr;
687   PC_Redundant   *red;
688   const char     *prefix;
689 
690   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
691   PetscMPIInt    rank,size,subrank,*subsize;
692   MPI_Comm       subcomm;
693   PetscMPIInt    duprank;
694   PetscMPIInt    rank_dup,size_dup;
695   MPI_Comm       dupcomm;
696 
697   PetscFunctionBegin;
698   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
699   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
700   red->useparallelmat   = PETSC_TRUE;
701 
702   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
703   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
704   nsubcomm = size;
705   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
706   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
707 
708   /* get size of each subcommunicators */
709   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
710   np_subcomm = size/nsubcomm;
711   nleftover  = size - nsubcomm*np_subcomm;
712   for (i=0; i<nsubcomm; i++){
713     subsize[i] = np_subcomm;
714     if (i<nleftover) subsize[i]++;
715   }
716 
717   /* find color for this proc */
718   color   = rank%nsubcomm;
719   subrank = rank/nsubcomm;
720 
721   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
722   red->subcomm  = subcomm;
723   red->color    = color;
724   red->nsubcomm = nsubcomm;
725 
726   j = 0; duprank = 0;
727   for (i=0; i<nsubcomm; i++){
728     if (j == color){
729       duprank += subrank;
730       break;
731     }
732     duprank += subsize[i]; j++;
733   }
734   /*
735   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
736   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
737   */
738 
739   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
740   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
741   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
742   /*
743   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
744   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
745   */
746   red->dupcomm = dupcomm;
747   ierr = PetscFree(subsize);CHKERRQ(ierr);
748 
749   /* create the sequential PC that each processor has copy of */
750   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
751   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
752   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
753   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
754   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
755 
756   pc->ops->apply             = PCApply_Redundant;
757   pc->ops->applytranspose    = 0;
758   pc->ops->setup             = PCSetUp_Redundant;
759   pc->ops->destroy           = PCDestroy_Redundant;
760   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
761   pc->ops->setuponblocks     = 0;
762   pc->ops->view              = PCView_Redundant;
763   pc->ops->applyrichardson   = 0;
764 
765   pc->data     = (void*)red;
766 
767   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
768                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
770                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
772                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
773 
774   PetscFunctionReturn(0);
775 }
776 EXTERN_C_END
777