xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision 3f457be154c925b43e77babd097a01a246d09d34)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2dba47a55SKris Buschelman 
34b9ad928SBarry Smith /*
4*3f457be1SHong Zhang   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
54b9ad928SBarry Smith */
66356e834SBarry Smith #include "private/pcimpl.h"     /*I "petscpc.h" I*/
74b9ad928SBarry Smith #include "petscksp.h"
84b9ad928SBarry Smith 
9*3f457be1SHong Zhang #undef CONTIGUOUS_COLOR
10*3f457be1SHong Zhang #define INTER_COLOR
11*3f457be1SHong Zhang 
124b9ad928SBarry Smith typedef struct {
134b9ad928SBarry Smith   PC         pc;                   /* actual preconditioner used on each processor */
14*3f457be1SHong Zhang   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
15*3f457be1SHong Zhang   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
164b9ad928SBarry Smith   Mat        *pmats;               /* matrix and optional preconditioner matrix */
17*3f457be1SHong Zhang   Mat        pmats_sub;            /* matrix and optional preconditioner matrix */
18*3f457be1SHong Zhang   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
194b9ad928SBarry Smith   PetscTruth useparallelmat;
20*3f457be1SHong Zhang   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
21*3f457be1SHong Zhang   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
22*3f457be1SHong Zhang                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
23*3f457be1SHong Zhang   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
24*3f457be1SHong Zhang   PetscInt   color;                /* color of processors in a subcommunicator */
254b9ad928SBarry Smith } PC_Redundant;
264b9ad928SBarry Smith 
274b9ad928SBarry Smith #undef __FUNCT__
284b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant"
296849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
304b9ad928SBarry Smith {
314b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
32dfbe8321SBarry Smith   PetscErrorCode ierr;
3313f74950SBarry Smith   PetscMPIInt    rank;
3432077d6dSBarry Smith   PetscTruth     iascii,isstring;
354b9ad928SBarry Smith   PetscViewer    sviewer;
364b9ad928SBarry Smith 
374b9ad928SBarry Smith   PetscFunctionBegin;
384b9ad928SBarry Smith   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
3932077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
404b9ad928SBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
4132077d6dSBarry Smith   if (iascii) {
424b9ad928SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
434b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
444b9ad928SBarry Smith     if (!rank) {
454b9ad928SBarry Smith       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
464b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
474b9ad928SBarry Smith       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
484b9ad928SBarry Smith     }
494b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
504b9ad928SBarry Smith   } else if (isstring) {
514b9ad928SBarry Smith     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
524b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
534b9ad928SBarry Smith     if (!rank) {
544b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
554b9ad928SBarry Smith     }
564b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
574b9ad928SBarry Smith   } else {
5879a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
594b9ad928SBarry Smith   }
604b9ad928SBarry Smith   PetscFunctionReturn(0);
614b9ad928SBarry Smith }
624b9ad928SBarry Smith 
63*3f457be1SHong Zhang #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
64*3f457be1SHong Zhang #include "private/vecimpl.h"
65*3f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
66*3f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
67*3f457be1SHong Zhang 
68*3f457be1SHong Zhang #undef __FUNCT__
69*3f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix"
70*3f457be1SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(PC pc,Mat mat,MPI_Comm subcomm,PetscInt mlocal_sub,Mat *matredundant)
71*3f457be1SHong Zhang {
72*3f457be1SHong Zhang   PetscMPIInt    rank,size,subrank,subsize;
73*3f457be1SHong Zhang   MPI_Comm       comm=mat->comm;
74*3f457be1SHong Zhang   PetscErrorCode ierr;
75*3f457be1SHong Zhang   PC_Redundant   *red=(PC_Redundant*)pc->data;
76*3f457be1SHong Zhang   PetscInt       nsubcomm=red->nsubcomm,nsends,nrecvs,i,prid=100,itmp;
77*3f457be1SHong Zhang   PetscMPIInt    *send_rank,*recv_rank;
78*3f457be1SHong Zhang   PetscInt       *rowrange=pc->pmat->rmap.range,mlocal_max,nzlocal;
79*3f457be1SHong Zhang   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
80*3f457be1SHong Zhang   Mat            A=aij->A,B=aij->B;
81*3f457be1SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
82*3f457be1SHong Zhang   Mat            C;
83*3f457be1SHong Zhang 
84*3f457be1SHong Zhang   PetscInt       nleftover,np_subcomm,j;
85*3f457be1SHong Zhang   PetscInt       nz_A,nz_B,*sbuf_j;
86*3f457be1SHong Zhang   PetscScalar    *sbuf_a;
87*3f457be1SHong Zhang   PetscInt       cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
88*3f457be1SHong Zhang   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
89*3f457be1SHong Zhang   PetscInt       *cols,ctmp,lwrite,*rptr,l;
90*3f457be1SHong Zhang   PetscScalar    *vals,*aworkA,*aworkB;
91*3f457be1SHong Zhang 
92*3f457be1SHong Zhang   PetscMPIInt    tag1,tag2,tag3,imdex;
93*3f457be1SHong Zhang   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
94*3f457be1SHong Zhang   MPI_Status     recv_status,*send_status;
95*3f457be1SHong Zhang   PetscInt       *sbuf_nz,*rbuf_nz,count;
96*3f457be1SHong Zhang 
97*3f457be1SHong Zhang   PetscInt     **rbuf_j;
98*3f457be1SHong Zhang   PetscScalar  **rbuf_a;
99*3f457be1SHong Zhang 
100*3f457be1SHong Zhang   PetscFunctionBegin;
101*3f457be1SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
102*3f457be1SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
103*3f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
104*3f457be1SHong Zhang   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
105*3f457be1SHong Zhang   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
106*3f457be1SHong Zhang   /*
107*3f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(comm, "[%d] subrank %d, subsize %d\n",rank,subrank,subsize);
108*3f457be1SHong Zhang   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
109*3f457be1SHong Zhang   */
110*3f457be1SHong Zhang   /* get the destination processors */
111*3f457be1SHong Zhang   ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
112*3f457be1SHong Zhang   recv_rank = send_rank + size;
113*3f457be1SHong Zhang   np_subcomm = size/nsubcomm;
114*3f457be1SHong Zhang   nleftover  = size - nsubcomm*np_subcomm;
115*3f457be1SHong Zhang   nsends = 0; nrecvs = 0;
116*3f457be1SHong Zhang   for (i=0; i<size; i++){ /* i=rank*/
117*3f457be1SHong Zhang     if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
118*3f457be1SHong Zhang       send_rank[nsends] = i; nsends++;
119*3f457be1SHong Zhang       recv_rank[nrecvs++] = i;
120*3f457be1SHong Zhang     }
121*3f457be1SHong Zhang   }
122*3f457be1SHong Zhang   if (rank >= size - nleftover){/* this proc is a leftover processor */
123*3f457be1SHong Zhang     i = size-nleftover-1;
124*3f457be1SHong Zhang     j = 0;
125*3f457be1SHong Zhang     while (j < nsubcomm - nleftover){
126*3f457be1SHong Zhang       send_rank[nsends++] = i;
127*3f457be1SHong Zhang       i--; j++;
128*3f457be1SHong Zhang     }
129*3f457be1SHong Zhang   }
130*3f457be1SHong Zhang 
131*3f457be1SHong Zhang   if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
132*3f457be1SHong Zhang     for (i=0; i<nleftover; i++){
133*3f457be1SHong Zhang       recv_rank[nrecvs++] = size-nleftover+i;
134*3f457be1SHong Zhang     }
135*3f457be1SHong Zhang   }
136*3f457be1SHong Zhang   if (prid == rank){
137*3f457be1SHong Zhang     printf("[%d] sends to ",rank);
138*3f457be1SHong Zhang     for (i=0; i<nsends; i++) printf(" [%d],",send_rank[i]);
139*3f457be1SHong Zhang     printf("  \n");
140*3f457be1SHong Zhang   }
141*3f457be1SHong Zhang   /*
142*3f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(comm, "[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);
143*3f457be1SHong Zhang   ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr);
144*3f457be1SHong Zhang   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
145*3f457be1SHong Zhang   */
146*3f457be1SHong Zhang 
147*3f457be1SHong Zhang   /* get this processor's nzlocal=nz_A+nz_B */
148*3f457be1SHong Zhang   nz_A = a->nz; nz_B = b->nz;
149*3f457be1SHong Zhang   nzlocal = nz_A + nz_B;
150*3f457be1SHong Zhang 
151*3f457be1SHong Zhang   /* allocate sbuf_j, sbuf_a, then copy mat's local entries into the buffers */
152*3f457be1SHong Zhang   itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
153*3f457be1SHong Zhang   ierr = PetscMalloc(itmp*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
154*3f457be1SHong Zhang   ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
155*3f457be1SHong Zhang 
156*3f457be1SHong Zhang     rptr = sbuf_j;
157*3f457be1SHong Zhang     cols = sbuf_j + rend-rstart + 1;
158*3f457be1SHong Zhang     vals = sbuf_a;
159*3f457be1SHong Zhang     rptr[0] = 0;
160*3f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
161*3f457be1SHong Zhang       row = i + rstart;
162*3f457be1SHong Zhang       if (rank == prid) printf(" \n row %d: ",row);
163*3f457be1SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
164*3f457be1SHong Zhang       ncols  = nzA + nzB;
165*3f457be1SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
166*3f457be1SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
167*3f457be1SHong Zhang       /* load the column indices for this row into cols */
168*3f457be1SHong Zhang       lwrite = 0;
169*3f457be1SHong Zhang       for (l=0; l<nzB; l++) {
170*3f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart){
171*3f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
172*3f457be1SHong Zhang           cols[lwrite++] = ctmp;
173*3f457be1SHong Zhang           if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]);
174*3f457be1SHong Zhang         }
175*3f457be1SHong Zhang       }
176*3f457be1SHong Zhang       for (l=0; l<nzA; l++){
177*3f457be1SHong Zhang         vals[lwrite]   = aworkA[l];
178*3f457be1SHong Zhang         cols[lwrite++] = cstart + cworkA[l];
179*3f457be1SHong Zhang         if (rank == prid) printf(" (%d,%g)",cstart + cworkA[l],aworkA[l]);
180*3f457be1SHong Zhang       }
181*3f457be1SHong Zhang       for (l=0; l<nzB; l++) {
182*3f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend){
183*3f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
184*3f457be1SHong Zhang           cols[lwrite++] = ctmp;
185*3f457be1SHong Zhang           if (rank == prid) printf(" (%d,%g)",ctmp,aworkB[l]);
186*3f457be1SHong Zhang         }
187*3f457be1SHong Zhang       }
188*3f457be1SHong Zhang       /* insert local matrix values into C */
189*3f457be1SHong Zhang       //ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
190*3f457be1SHong Zhang 
191*3f457be1SHong Zhang       vals += ncols;
192*3f457be1SHong Zhang       cols += ncols;
193*3f457be1SHong Zhang       rptr[i+1] = rptr[i] + ncols;
194*3f457be1SHong Zhang     }
195*3f457be1SHong Zhang     /*
196*3f457be1SHong Zhang     ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197*3f457be1SHong Zhang     ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
198*3f457be1SHong Zhang     */
199*3f457be1SHong Zhang     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
200*3f457be1SHong Zhang 
201*3f457be1SHong Zhang   /* send nzlocal to others, and recv other's nzlocal */
202*3f457be1SHong Zhang   /*--------------------------------------------------*/
203*3f457be1SHong Zhang   ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
204*3f457be1SHong Zhang   rbuf_nz = sbuf_nz + nsends;
205*3f457be1SHong Zhang 
206*3f457be1SHong Zhang   ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits1,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
207*3f457be1SHong Zhang   s_waits2 = s_waits1 + nsends;
208*3f457be1SHong Zhang   s_waits3 = s_waits2 + nsends;
209*3f457be1SHong Zhang   r_waits1 = s_waits3 + nsends;
210*3f457be1SHong Zhang   r_waits2 = r_waits1 + nrecvs;
211*3f457be1SHong Zhang   r_waits3 = r_waits2 + nrecvs;
212*3f457be1SHong Zhang 
213*3f457be1SHong Zhang   /* get some new tags to keep the communication clean */
214*3f457be1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)A,&tag1);CHKERRQ(ierr);
215*3f457be1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)A,&tag2);CHKERRQ(ierr);
216*3f457be1SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)A,&tag3);CHKERRQ(ierr);
217*3f457be1SHong Zhang 
218*3f457be1SHong Zhang   /* post receives of other's nzlocal */
219*3f457be1SHong Zhang   for (i=0; i<nrecvs; i++){
220*3f457be1SHong Zhang     ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
221*3f457be1SHong Zhang   }
222*3f457be1SHong Zhang 
223*3f457be1SHong Zhang   /* send nzlocal to others */
224*3f457be1SHong Zhang   for (i=0; i<nsends; i++){
225*3f457be1SHong Zhang     sbuf_nz[i] = nzlocal;
226*3f457be1SHong Zhang     ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
227*3f457be1SHong Zhang     if (prid == rank) printf(" [%d] sends nz %d to [%d]\n",rank,nzlocal,send_rank[i]);
228*3f457be1SHong Zhang   }
229*3f457be1SHong Zhang 
230*3f457be1SHong Zhang   /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
231*3f457be1SHong Zhang   count = nrecvs;
232*3f457be1SHong Zhang   while (count) {
233*3f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
234*3f457be1SHong Zhang     recv_rank[imdex] = recv_status.MPI_SOURCE;
235*3f457be1SHong Zhang     /* allocate rbuf_a and rbuf_j; then post receives of rbuf_a and rbuf_j */
236*3f457be1SHong Zhang     ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
237*3f457be1SHong Zhang     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_status.MPI_SOURCE,tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
238*3f457be1SHong Zhang 
239*3f457be1SHong Zhang     itmp = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
240*3f457be1SHong Zhang     rbuf_nz[imdex] += itmp+2;
241*3f457be1SHong Zhang     ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
242*3f457be1SHong Zhang     ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
243*3f457be1SHong Zhang     count--;
244*3f457be1SHong Zhang   }
245*3f457be1SHong Zhang 
246*3f457be1SHong Zhang   /* wait on sends of nzlocal */
247*3f457be1SHong Zhang   if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
248*3f457be1SHong Zhang 
249*3f457be1SHong Zhang   /* send mat->i,j and mat->a to others, and recv from other's */
250*3f457be1SHong Zhang   /*-----------------------------------------------------------*/
251*3f457be1SHong Zhang   for (i=0; i<nsends; i++){
252*3f457be1SHong Zhang     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
253*3f457be1SHong Zhang     itmp = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
254*3f457be1SHong Zhang     ierr = MPI_Isend(sbuf_j,itmp,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
255*3f457be1SHong Zhang   }
256*3f457be1SHong Zhang 
257*3f457be1SHong Zhang   /* wait on receives of mat->i,j and mat->a */
258*3f457be1SHong Zhang   /*-----------------------------------------*/
259*3f457be1SHong Zhang   count = nrecvs;
260*3f457be1SHong Zhang   while (count) {
261*3f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
262*3f457be1SHong Zhang     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
263*3f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
264*3f457be1SHong Zhang     count--;
265*3f457be1SHong Zhang   }
266*3f457be1SHong Zhang 
267*3f457be1SHong Zhang   /* wait on sends of mat->i,j and mat->a */
268*3f457be1SHong Zhang   /*--------------------------------------*/
269*3f457be1SHong Zhang   if (nsends) {
270*3f457be1SHong Zhang     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
271*3f457be1SHong Zhang     ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
272*3f457be1SHong Zhang   }
273*3f457be1SHong Zhang   ierr = PetscFree(sbuf_nz);CHKERRQ(ierr);
274*3f457be1SHong Zhang   ierr = PetscFree2(s_waits1,send_status);CHKERRQ(ierr);
275*3f457be1SHong Zhang 
276*3f457be1SHong Zhang   /* create redundant matrix */
277*3f457be1SHong Zhang   /*-------------------------*/
278*3f457be1SHong Zhang   ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
279*3f457be1SHong Zhang   ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
280*3f457be1SHong Zhang   ierr = MatSetFromOptions(C);CHKERRQ(ierr);
281*3f457be1SHong Zhang 
282*3f457be1SHong Zhang   /* insert local matrix entries */
283*3f457be1SHong Zhang   rptr = sbuf_j;
284*3f457be1SHong Zhang   cols = sbuf_j + rend-rstart + 1;
285*3f457be1SHong Zhang   vals = sbuf_a;
286*3f457be1SHong Zhang   for (i=0; i<rend-rstart; i++){
287*3f457be1SHong Zhang     row   = i + rstart;
288*3f457be1SHong Zhang     ncols = rptr[i+1] - rptr[i];
289*3f457be1SHong Zhang     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
290*3f457be1SHong Zhang     vals += ncols;
291*3f457be1SHong Zhang     cols += ncols;
292*3f457be1SHong Zhang   }
293*3f457be1SHong Zhang   /* insert received matrix entries */
294*3f457be1SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++){
295*3f457be1SHong Zhang 
296*3f457be1SHong Zhang     rstart = rowrange[recv_rank[imdex]];
297*3f457be1SHong Zhang     rend   = rowrange[recv_rank[imdex]+1];
298*3f457be1SHong Zhang     rptr = rbuf_j[imdex];
299*3f457be1SHong Zhang     cols = rbuf_j[imdex] + rend-rstart + 1;
300*3f457be1SHong Zhang     vals = rbuf_a[imdex];
301*3f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
302*3f457be1SHong Zhang       row   = i + rstart;
303*3f457be1SHong Zhang       ncols = rptr[i+1] - rptr[i];
304*3f457be1SHong Zhang       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
305*3f457be1SHong Zhang       vals += ncols;
306*3f457be1SHong Zhang       cols += ncols;
307*3f457be1SHong Zhang     }
308*3f457be1SHong Zhang   }
309*3f457be1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
310*3f457be1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
311*3f457be1SHong Zhang   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
312*3f457be1SHong Zhang   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
313*3f457be1SHong Zhang   *matredundant = C;
314*3f457be1SHong Zhang 
315*3f457be1SHong Zhang   /* free space */
316*3f457be1SHong Zhang   ierr = PetscFree(send_rank);CHKERRQ(ierr);
317*3f457be1SHong Zhang   ierr = PetscFree(sbuf_j);CHKERRQ(ierr);
318*3f457be1SHong Zhang   ierr = PetscFree(sbuf_a);CHKERRQ(ierr);
319*3f457be1SHong Zhang   for (i=0; i<nrecvs; i++){
320*3f457be1SHong Zhang     ierr = PetscFree(rbuf_j[i]);CHKERRQ(ierr);
321*3f457be1SHong Zhang     ierr = PetscFree(rbuf_a[i]);CHKERRQ(ierr);
322*3f457be1SHong Zhang   }
323*3f457be1SHong Zhang   ierr = PetscFree3(sbuf_nz,rbuf_j,rbuf_a);CHKERRQ(ierr);
324*3f457be1SHong Zhang   PetscFunctionReturn(0);
325*3f457be1SHong Zhang }
326*3f457be1SHong Zhang 
3274b9ad928SBarry Smith #undef __FUNCT__
3284b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant"
3296849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc)
3304b9ad928SBarry Smith {
3314b9ad928SBarry Smith   PC_Redundant   *red  = (PC_Redundant*)pc->data;
332dfbe8321SBarry Smith   PetscErrorCode ierr;
333*3f457be1SHong Zhang   PetscInt       mstart,mend,mlocal,m;
33413f74950SBarry Smith   PetscMPIInt    size;
3354b9ad928SBarry Smith   IS             isl;
3364b9ad928SBarry Smith   MatReuse       reuse = MAT_INITIAL_MATRIX;
3374b9ad928SBarry Smith   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
3384b9ad928SBarry Smith   MPI_Comm       comm;
33923ce1328SBarry Smith   Vec            vec;
3404b9ad928SBarry Smith 
341*3f457be1SHong Zhang   PetscMPIInt rank,size_sub,itmp;
342*3f457be1SHong Zhang   PetscInt    mlocal_sub;
343*3f457be1SHong Zhang   PetscMPIInt subsize,subrank;
344*3f457be1SHong Zhang   PetscInt    rstart_sub,rend_sub,mloc_sub;
345*3f457be1SHong Zhang   Mat         Aredundant;
346*3f457be1SHong Zhang 
3474b9ad928SBarry Smith   PetscFunctionBegin;
348*3f457be1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
34923ce1328SBarry Smith   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
3504b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
35123ce1328SBarry Smith   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
3524b9ad928SBarry Smith   if (!pc->setupcalled) {
353*3f457be1SHong Zhang     /* create working vectors xsub/ysub and xdup/ydup */
35423ce1328SBarry Smith     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
355*3f457be1SHong Zhang     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
3564b9ad928SBarry Smith 
357*3f457be1SHong Zhang #ifdef INTER_COLOR
358*3f457be1SHong Zhang     /* get local size of xsub/ysub */
359*3f457be1SHong Zhang     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
360*3f457be1SHong Zhang     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
361*3f457be1SHong Zhang     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
362*3f457be1SHong Zhang     if (subrank+1 < subsize){
363*3f457be1SHong Zhang       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
364*3f457be1SHong Zhang     } else {
365*3f457be1SHong Zhang       rend_sub = m;
366*3f457be1SHong Zhang     }
367*3f457be1SHong Zhang     mloc_sub = rend_sub - rstart_sub;
368*3f457be1SHong Zhang     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
369*3f457be1SHong Zhang     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
370*3f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
371*3f457be1SHong Zhang #endif
372*3f457be1SHong Zhang #ifdef CONTIGUOUS_COLOR
373*3f457be1SHong Zhang     ierr = VecCreateMPI(red->subcomm,PETSC_DECIDE,m,&red->ysub);CHKERRQ(ierr);
374*3f457be1SHong Zhang     ierr = VecGetLocalSize(red->ysub,&mloc_sub);CHKERRQ(ierr);
375*3f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,m,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
376*3f457be1SHong Zhang #endif
377*3f457be1SHong Zhang 
378*3f457be1SHong Zhang     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
379*3f457be1SHong Zhang        Note: we use communicator dupcomm, not pc->comm! */
380*3f457be1SHong Zhang     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
381*3f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
382*3f457be1SHong Zhang 
383*3f457be1SHong Zhang     /* create vec scatters */
384*3f457be1SHong Zhang     if (!red->scatterin){
385*3f457be1SHong Zhang       IS       is1,is2;
386*3f457be1SHong Zhang       PetscInt *idx1,*idx2,i,j,k;
387*3f457be1SHong Zhang       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
388*3f457be1SHong Zhang       idx2 = idx1 + red->nsubcomm*mlocal;
389*3f457be1SHong Zhang       j = 0;
390*3f457be1SHong Zhang       for (k=0; k<red->nsubcomm; k++){
391*3f457be1SHong Zhang         for (i=mstart; i<mend; i++){
392*3f457be1SHong Zhang           idx1[j]   = i;
393*3f457be1SHong Zhang           idx2[j++] = i + m*k;
394*3f457be1SHong Zhang         }
395*3f457be1SHong Zhang       }
396*3f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
397*3f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
398*3f457be1SHong Zhang       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
399*3f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
400*3f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
401*3f457be1SHong Zhang 
402*3f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
403*3f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
404*3f457be1SHong Zhang       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
405*3f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
406*3f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
407*3f457be1SHong Zhang       ierr = PetscFree(idx1);CHKERRQ(ierr);
4084b9ad928SBarry Smith     }
4094b9ad928SBarry Smith   }
41023ce1328SBarry Smith   ierr = VecDestroy(vec);CHKERRQ(ierr);
4114b9ad928SBarry Smith 
4124b9ad928SBarry Smith   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
413*3f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4144b9ad928SBarry Smith   if (size == 1) {
4154b9ad928SBarry Smith     red->useparallelmat = PETSC_FALSE;
4164b9ad928SBarry Smith   }
4174b9ad928SBarry Smith 
4184b9ad928SBarry Smith   if (red->useparallelmat) {
4194b9ad928SBarry Smith     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
4204b9ad928SBarry Smith       /* destroy old matrices */
4214b9ad928SBarry Smith       if (red->pmats) {
4224b9ad928SBarry Smith         ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
4234b9ad928SBarry Smith       }
4244b9ad928SBarry Smith     } else if (pc->setupcalled == 1) {
4254b9ad928SBarry Smith       reuse = MAT_REUSE_MATRIX;
4264b9ad928SBarry Smith       str   = SAME_NONZERO_PATTERN;
4274b9ad928SBarry Smith     }
4284b9ad928SBarry Smith 
429*3f457be1SHong Zhang     /* ================== matrix ============= */
430*3f457be1SHong Zhang     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
431*3f457be1SHong Zhang     ierr = MatGetRedundantMatrix_AIJ(pc,pc->pmat,red->subcomm,mlocal_sub,&Aredundant);CHKERRQ(ierr);
432*3f457be1SHong Zhang 
433*3f457be1SHong Zhang     /* grab the parallel matrix and put it into processors of a subcomminicator */
4344b9ad928SBarry Smith     ierr = ISCreateStride(PETSC_COMM_SELF,m,0,1,&isl);CHKERRQ(ierr);
4354b9ad928SBarry Smith     ierr = MatGetSubMatrices(pc->pmat,1,&isl,&isl,reuse,&red->pmats);CHKERRQ(ierr);
4364b9ad928SBarry Smith     ierr = ISDestroy(isl);CHKERRQ(ierr);
4374b9ad928SBarry Smith 
438*3f457be1SHong Zhang     /* ------- temporarily set a mpi matrix pmats_sub- provided by user! --*/
439*3f457be1SHong Zhang     ierr = MatCreate(red->subcomm,&red->pmats_sub);CHKERRQ(ierr);
440*3f457be1SHong Zhang     ierr = MatSetSizes(red->pmats_sub,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
441*3f457be1SHong Zhang     ierr = MatSetFromOptions(red->pmats_sub);CHKERRQ(ierr);
442*3f457be1SHong Zhang     {
443*3f457be1SHong Zhang       PetscInt          Istart,Iend,ncols,i;
444*3f457be1SHong Zhang       const PetscInt    *cols;
445*3f457be1SHong Zhang       const PetscScalar *vals;
446*3f457be1SHong Zhang       PetscTruth flg;
447*3f457be1SHong Zhang       ierr = MatGetOwnershipRange(red->pmats_sub,&Istart,&Iend);CHKERRQ(ierr);
448*3f457be1SHong Zhang       for (i=Istart; i<Iend; i++) {
449*3f457be1SHong Zhang         ierr = MatGetRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr);
450*3f457be1SHong Zhang         ierr = MatSetValues(red->pmats_sub,1,&i,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
451*3f457be1SHong Zhang         ierr = MatRestoreRow(red->pmats[0],i,&ncols,&cols,&vals);CHKERRQ(ierr);
452*3f457be1SHong Zhang       }
453*3f457be1SHong Zhang       ierr = MatAssemblyBegin(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454*3f457be1SHong Zhang       ierr = MatAssemblyEnd(red->pmats_sub,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
455*3f457be1SHong Zhang 
456*3f457be1SHong Zhang       ierr = MatEqual(red->pmats_sub,Aredundant,&flg);CHKERRQ(ierr);
457*3f457be1SHong Zhang       if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"pmats_sub !=Aredundant ");
458*3f457be1SHong Zhang       ierr = MatDestroy(red->pmats_sub);
459*3f457be1SHong Zhang     }
460*3f457be1SHong Zhang     red->pmats_sub = Aredundant;
461*3f457be1SHong Zhang 
462*3f457be1SHong Zhang     /* tell PC of the subcommunicator its operators */
463*3f457be1SHong Zhang     ierr = PCSetOperators(red->pc,red->pmats_sub,red->pmats_sub,str);CHKERRQ(ierr);
4644b9ad928SBarry Smith   } else {
4654b9ad928SBarry Smith     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
4664b9ad928SBarry Smith   }
4674b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
4684b9ad928SBarry Smith   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
4694b9ad928SBarry Smith   PetscFunctionReturn(0);
4704b9ad928SBarry Smith }
4714b9ad928SBarry Smith 
4724b9ad928SBarry Smith 
4734b9ad928SBarry Smith #undef __FUNCT__
4744b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant"
4756849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
4764b9ad928SBarry Smith {
4774b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
478dfbe8321SBarry Smith   PetscErrorCode ierr;
479*3f457be1SHong Zhang   PetscScalar    *array;
4804b9ad928SBarry Smith 
4814b9ad928SBarry Smith   PetscFunctionBegin;
482*3f457be1SHong Zhang   /* scatter x to xdup */
483*3f457be1SHong Zhang   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
484*3f457be1SHong Zhang   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
485*3f457be1SHong Zhang 
486*3f457be1SHong Zhang   /* place xdup's local array into xsub */
487*3f457be1SHong Zhang   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
488*3f457be1SHong Zhang   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
4894b9ad928SBarry Smith 
4904b9ad928SBarry Smith   /* apply preconditioner on each processor */
491*3f457be1SHong Zhang   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
492*3f457be1SHong Zhang   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
493*3f457be1SHong Zhang   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
4944b9ad928SBarry Smith 
495*3f457be1SHong Zhang   /* place ysub's local array into ydup */
496*3f457be1SHong Zhang   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
497*3f457be1SHong Zhang   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
498*3f457be1SHong Zhang 
499*3f457be1SHong Zhang   /* scatter ydup to y */
500*3f457be1SHong Zhang   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
501*3f457be1SHong Zhang   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
502*3f457be1SHong Zhang   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
503*3f457be1SHong Zhang   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
5044b9ad928SBarry Smith   PetscFunctionReturn(0);
5054b9ad928SBarry Smith }
5064b9ad928SBarry Smith 
5074b9ad928SBarry Smith #undef __FUNCT__
5084b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant"
5096849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc)
5104b9ad928SBarry Smith {
5114b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
512dfbe8321SBarry Smith   PetscErrorCode ierr;
5134b9ad928SBarry Smith 
5144b9ad928SBarry Smith   PetscFunctionBegin;
5154b9ad928SBarry Smith   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
5164b9ad928SBarry Smith   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
517*3f457be1SHong Zhang   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
518*3f457be1SHong Zhang   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
519*3f457be1SHong Zhang   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
520*3f457be1SHong Zhang   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
521*3f457be1SHong Zhang   if (red->pmats_sub) {
522*3f457be1SHong Zhang     ierr = MatDestroy(red->pmats_sub);CHKERRQ(ierr);
523*3f457be1SHong Zhang   }
524*3f457be1SHong Zhang 
5254b9ad928SBarry Smith   if (red->pmats) {
5264b9ad928SBarry Smith     ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
5274b9ad928SBarry Smith   }
5284b9ad928SBarry Smith   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
5294b9ad928SBarry Smith   ierr = PetscFree(red);CHKERRQ(ierr);
5304b9ad928SBarry Smith   PetscFunctionReturn(0);
5314b9ad928SBarry Smith }
5324b9ad928SBarry Smith 
5334b9ad928SBarry Smith #undef __FUNCT__
5344b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant"
5356849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
5364b9ad928SBarry Smith {
5374b9ad928SBarry Smith   PetscFunctionBegin;
5384b9ad928SBarry Smith   PetscFunctionReturn(0);
5394b9ad928SBarry Smith }
5404b9ad928SBarry Smith 
5414b9ad928SBarry Smith EXTERN_C_BEGIN
5424b9ad928SBarry Smith #undef __FUNCT__
5434b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant"
544dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
5454b9ad928SBarry Smith {
5464b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
547dfbe8321SBarry Smith   PetscErrorCode ierr;
5484b9ad928SBarry Smith 
5494b9ad928SBarry Smith   PetscFunctionBegin;
5504b9ad928SBarry Smith   red->scatterin  = in;
5514b9ad928SBarry Smith   red->scatterout = out;
5524b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
5534b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
5544b9ad928SBarry Smith   PetscFunctionReturn(0);
5554b9ad928SBarry Smith }
5564b9ad928SBarry Smith EXTERN_C_END
5574b9ad928SBarry Smith 
5584b9ad928SBarry Smith #undef __FUNCT__
5594b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter"
5604b9ad928SBarry Smith /*@
5614b9ad928SBarry Smith    PCRedundantSetScatter - Sets the scatter used to copy values into the
5624b9ad928SBarry Smith      redundant local solve and the scatter to move them back into the global
5634b9ad928SBarry Smith      vector.
5644b9ad928SBarry Smith 
5654b9ad928SBarry Smith    Collective on PC
5664b9ad928SBarry Smith 
5674b9ad928SBarry Smith    Input Parameters:
5684b9ad928SBarry Smith +  pc - the preconditioner context
5694b9ad928SBarry Smith .  in - the scatter to move the values in
5704b9ad928SBarry Smith -  out - the scatter to move them out
5714b9ad928SBarry Smith 
5724b9ad928SBarry Smith    Level: advanced
5734b9ad928SBarry Smith 
5744b9ad928SBarry Smith .keywords: PC, redundant solve
5754b9ad928SBarry Smith @*/
576dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
5774b9ad928SBarry Smith {
578dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
5794b9ad928SBarry Smith 
5804b9ad928SBarry Smith   PetscFunctionBegin;
5814482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
5824482741eSBarry Smith   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
5834482741eSBarry Smith   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
5844b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
5854b9ad928SBarry Smith   if (f) {
5864b9ad928SBarry Smith     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
5874b9ad928SBarry Smith   }
5884b9ad928SBarry Smith   PetscFunctionReturn(0);
5894b9ad928SBarry Smith }
5904b9ad928SBarry Smith 
5914b9ad928SBarry Smith EXTERN_C_BEGIN
5924b9ad928SBarry Smith #undef __FUNCT__
5934b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant"
594dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
5954b9ad928SBarry Smith {
5964b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
5974b9ad928SBarry Smith 
5984b9ad928SBarry Smith   PetscFunctionBegin;
5994b9ad928SBarry Smith   *innerpc = red->pc;
6004b9ad928SBarry Smith   PetscFunctionReturn(0);
6014b9ad928SBarry Smith }
6024b9ad928SBarry Smith EXTERN_C_END
6034b9ad928SBarry Smith 
6044b9ad928SBarry Smith #undef __FUNCT__
6054b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC"
6064b9ad928SBarry Smith /*@
6074b9ad928SBarry Smith    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
6084b9ad928SBarry Smith 
6094b9ad928SBarry Smith    Not Collective
6104b9ad928SBarry Smith 
6114b9ad928SBarry Smith    Input Parameter:
6124b9ad928SBarry Smith .  pc - the preconditioner context
6134b9ad928SBarry Smith 
6144b9ad928SBarry Smith    Output Parameter:
6154b9ad928SBarry Smith .  innerpc - the sequential PC
6164b9ad928SBarry Smith 
6174b9ad928SBarry Smith    Level: advanced
6184b9ad928SBarry Smith 
6194b9ad928SBarry Smith .keywords: PC, redundant solve
6204b9ad928SBarry Smith @*/
621dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
6224b9ad928SBarry Smith {
623dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,PC*);
6244b9ad928SBarry Smith 
6254b9ad928SBarry Smith   PetscFunctionBegin;
6264482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6274482741eSBarry Smith   PetscValidPointer(innerpc,2);
6284b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
6294b9ad928SBarry Smith   if (f) {
6304b9ad928SBarry Smith     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
6314b9ad928SBarry Smith   }
6324b9ad928SBarry Smith   PetscFunctionReturn(0);
6334b9ad928SBarry Smith }
6344b9ad928SBarry Smith 
6354b9ad928SBarry Smith EXTERN_C_BEGIN
6364b9ad928SBarry Smith #undef __FUNCT__
6374b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant"
638dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
6394b9ad928SBarry Smith {
6404b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
6414b9ad928SBarry Smith 
6424b9ad928SBarry Smith   PetscFunctionBegin;
6434b9ad928SBarry Smith   if (mat)  *mat  = red->pmats[0];
6444b9ad928SBarry Smith   if (pmat) *pmat = red->pmats[0];
6454b9ad928SBarry Smith   PetscFunctionReturn(0);
6464b9ad928SBarry Smith }
6474b9ad928SBarry Smith EXTERN_C_END
6484b9ad928SBarry Smith 
6494b9ad928SBarry Smith #undef __FUNCT__
6504b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators"
6514b9ad928SBarry Smith /*@
6524b9ad928SBarry Smith    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
6534b9ad928SBarry Smith 
6544b9ad928SBarry Smith    Not Collective
6554b9ad928SBarry Smith 
6564b9ad928SBarry Smith    Input Parameter:
6574b9ad928SBarry Smith .  pc - the preconditioner context
6584b9ad928SBarry Smith 
6594b9ad928SBarry Smith    Output Parameters:
6604b9ad928SBarry Smith +  mat - the matrix
6614b9ad928SBarry Smith -  pmat - the (possibly different) preconditioner matrix
6624b9ad928SBarry Smith 
6634b9ad928SBarry Smith    Level: advanced
6644b9ad928SBarry Smith 
6654b9ad928SBarry Smith .keywords: PC, redundant solve
6664b9ad928SBarry Smith @*/
667dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
6684b9ad928SBarry Smith {
669dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
6704b9ad928SBarry Smith 
6714b9ad928SBarry Smith   PetscFunctionBegin;
6724482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6734482741eSBarry Smith   if (mat)  PetscValidPointer(mat,2);
6744482741eSBarry Smith   if (pmat) PetscValidPointer(pmat,3);
6754b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
6764b9ad928SBarry Smith   if (f) {
6774b9ad928SBarry Smith     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
6784b9ad928SBarry Smith   }
6794b9ad928SBarry Smith   PetscFunctionReturn(0);
6804b9ad928SBarry Smith }
6814b9ad928SBarry Smith 
6824b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/
68337a17b4dSBarry Smith /*MC
68437a17b4dSBarry Smith      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
68537a17b4dSBarry Smith 
68637a17b4dSBarry Smith 
68737a17b4dSBarry Smith      Options for the redundant preconditioners can be set with -redundant_pc_xxx
68837a17b4dSBarry Smith 
68937a17b4dSBarry Smith    Level: intermediate
69037a17b4dSBarry Smith 
69137a17b4dSBarry Smith 
69237a17b4dSBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
69337a17b4dSBarry Smith            PCRedundantGetPC(), PCRedundantGetOperators()
69437a17b4dSBarry Smith 
69537a17b4dSBarry Smith M*/
69637a17b4dSBarry Smith 
6974b9ad928SBarry Smith EXTERN_C_BEGIN
6984b9ad928SBarry Smith #undef __FUNCT__
6994b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant"
700dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
7014b9ad928SBarry Smith {
702dfbe8321SBarry Smith   PetscErrorCode ierr;
7034b9ad928SBarry Smith   PC_Redundant   *red;
704e060cb09SBarry Smith   const char     *prefix;
7054b9ad928SBarry Smith 
706*3f457be1SHong Zhang   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
707*3f457be1SHong Zhang   PetscMPIInt    rank,size,subrank,*subsize;
708*3f457be1SHong Zhang   MPI_Comm       subcomm;
709*3f457be1SHong Zhang   PetscMPIInt    duprank;
710*3f457be1SHong Zhang   PetscMPIInt    rank_dup,size_dup;
711*3f457be1SHong Zhang   MPI_Comm       dupcomm;
712*3f457be1SHong Zhang 
7134b9ad928SBarry Smith   PetscFunctionBegin;
7144b9ad928SBarry Smith   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
71552e6d16bSBarry Smith   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
7164b9ad928SBarry Smith   red->useparallelmat   = PETSC_TRUE;
7174b9ad928SBarry Smith 
718*3f457be1SHong Zhang   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
719*3f457be1SHong Zhang   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
720*3f457be1SHong Zhang   nsubcomm = size;
721*3f457be1SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
722*3f457be1SHong Zhang   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
723*3f457be1SHong Zhang 
724*3f457be1SHong Zhang   /* get size of each subcommunicators */
725*3f457be1SHong Zhang   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
726*3f457be1SHong Zhang   np_subcomm = size/nsubcomm;
727*3f457be1SHong Zhang   nleftover  = size - nsubcomm*np_subcomm;
728*3f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
729*3f457be1SHong Zhang     subsize[i] = np_subcomm;
730*3f457be1SHong Zhang     if (i<nleftover) subsize[i]++;
731*3f457be1SHong Zhang   }
732*3f457be1SHong Zhang 
733*3f457be1SHong Zhang   /* find color for this proc */
734*3f457be1SHong Zhang #ifdef INTER_COLOR
735*3f457be1SHong Zhang   color   = rank%nsubcomm;
736*3f457be1SHong Zhang   subrank = rank/nsubcomm;
737*3f457be1SHong Zhang #endif
738*3f457be1SHong Zhang 
739*3f457be1SHong Zhang #ifdef CONTIGUOUS_COLOR
740*3f457be1SHong Zhang   color = 0; subrank = 0; i = 0; j=0;
741*3f457be1SHong Zhang   while (i < size){
742*3f457be1SHong Zhang     if (rank == i) break; /* my color is found */
743*3f457be1SHong Zhang     if (j >= subsize[color]-1){ /* next subcomm */
744*3f457be1SHong Zhang       j = -1; subrank = -1; color++;
745*3f457be1SHong Zhang     }
746*3f457be1SHong Zhang     i++; j++; subrank++;
747*3f457be1SHong Zhang   }
748*3f457be1SHong Zhang #endif
749*3f457be1SHong Zhang 
750*3f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
751*3f457be1SHong Zhang   red->subcomm  = subcomm;
752*3f457be1SHong Zhang   red->color    = color;
753*3f457be1SHong Zhang   red->nsubcomm = nsubcomm;
754*3f457be1SHong Zhang 
755*3f457be1SHong Zhang   j = 0; duprank = 0;
756*3f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
757*3f457be1SHong Zhang     if (j == color){
758*3f457be1SHong Zhang       duprank += subrank;
759*3f457be1SHong Zhang       break;
760*3f457be1SHong Zhang     }
761*3f457be1SHong Zhang     duprank += subsize[i]; j++;
762*3f457be1SHong Zhang   }
763*3f457be1SHong Zhang   /*
764*3f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
765*3f457be1SHong Zhang   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
766*3f457be1SHong Zhang   */
767*3f457be1SHong Zhang 
768*3f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
769*3f457be1SHong Zhang   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
770*3f457be1SHong Zhang   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
771*3f457be1SHong Zhang   /*
772*3f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
773*3f457be1SHong Zhang   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
774*3f457be1SHong Zhang   */
775*3f457be1SHong Zhang   red->dupcomm = dupcomm;
776*3f457be1SHong Zhang   ierr = PetscFree(subsize);CHKERRQ(ierr);
777*3f457be1SHong Zhang 
7784b9ad928SBarry Smith   /* create the sequential PC that each processor has copy of */
779*3f457be1SHong Zhang   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
7804b9ad928SBarry Smith   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
7814b9ad928SBarry Smith   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
7824b9ad928SBarry Smith   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
7834b9ad928SBarry Smith   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
7844b9ad928SBarry Smith 
7854b9ad928SBarry Smith   pc->ops->apply             = PCApply_Redundant;
7864b9ad928SBarry Smith   pc->ops->applytranspose    = 0;
7874b9ad928SBarry Smith   pc->ops->setup             = PCSetUp_Redundant;
7884b9ad928SBarry Smith   pc->ops->destroy           = PCDestroy_Redundant;
7894b9ad928SBarry Smith   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
7904b9ad928SBarry Smith   pc->ops->setuponblocks     = 0;
7914b9ad928SBarry Smith   pc->ops->view              = PCView_Redundant;
7924b9ad928SBarry Smith   pc->ops->applyrichardson   = 0;
7934b9ad928SBarry Smith 
7944b9ad928SBarry Smith   pc->data     = (void*)red;
7954b9ad928SBarry Smith 
7964b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
7974b9ad928SBarry Smith                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
7984b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
7994b9ad928SBarry Smith                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
8004b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
8014b9ad928SBarry Smith                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
8024b9ad928SBarry Smith 
8034b9ad928SBarry Smith   PetscFunctionReturn(0);
8044b9ad928SBarry Smith }
8054b9ad928SBarry Smith EXTERN_C_END
806