xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision b3804887f4a4eb3efd03445267ed7f92fe61abdd)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2dba47a55SKris Buschelman 
34b9ad928SBarry Smith /*
43f457be1SHong Zhang   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
54b9ad928SBarry Smith */
66356e834SBarry Smith #include "private/pcimpl.h"     /*I "petscpc.h" I*/
74b9ad928SBarry Smith #include "petscksp.h"
84b9ad928SBarry Smith 
94b9ad928SBarry Smith typedef struct {
104b9ad928SBarry Smith   PC         pc;                   /* actual preconditioner used on each processor */
113f457be1SHong Zhang   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
123f457be1SHong Zhang   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13*b3804887SHong Zhang   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
143f457be1SHong Zhang   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
154b9ad928SBarry Smith   PetscTruth useparallelmat;
163f457be1SHong Zhang   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
173f457be1SHong Zhang   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
183f457be1SHong Zhang                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
193f457be1SHong Zhang   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
203f457be1SHong Zhang   PetscInt   color;                /* color of processors in a subcommunicator */
214b9ad928SBarry Smith } PC_Redundant;
224b9ad928SBarry Smith 
234b9ad928SBarry Smith #undef __FUNCT__
244b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant"
256849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
264b9ad928SBarry Smith {
274b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
28dfbe8321SBarry Smith   PetscErrorCode ierr;
2913f74950SBarry Smith   PetscMPIInt    rank;
3032077d6dSBarry Smith   PetscTruth     iascii,isstring;
314b9ad928SBarry Smith   PetscViewer    sviewer;
324b9ad928SBarry Smith 
334b9ad928SBarry Smith   PetscFunctionBegin;
344b9ad928SBarry Smith   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
3532077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
364b9ad928SBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
3732077d6dSBarry Smith   if (iascii) {
384b9ad928SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
394b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
404b9ad928SBarry Smith     if (!rank) {
414b9ad928SBarry Smith       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
424b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
434b9ad928SBarry Smith       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
444b9ad928SBarry Smith     }
454b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
464b9ad928SBarry Smith   } else if (isstring) {
474b9ad928SBarry Smith     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
484b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
494b9ad928SBarry Smith     if (!rank) {
504b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
514b9ad928SBarry Smith     }
524b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
534b9ad928SBarry Smith   } else {
5479a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
554b9ad928SBarry Smith   }
564b9ad928SBarry Smith   PetscFunctionReturn(0);
574b9ad928SBarry Smith }
584b9ad928SBarry Smith 
593f457be1SHong Zhang #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
603f457be1SHong Zhang #include "private/vecimpl.h"
613f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
623f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
633f457be1SHong Zhang 
64*b3804887SHong Zhang typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
65*b3804887SHong Zhang   PetscInt       nzlocal,nsends,nrecvs;
66*b3804887SHong Zhang   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
67*b3804887SHong Zhang   PetscScalar    *sbuf_a,**rbuf_a;
68*b3804887SHong Zhang   PetscErrorCode (*MatDestroy)(Mat);
69*b3804887SHong Zhang } Mat_Redundant;
70*b3804887SHong Zhang 
71*b3804887SHong Zhang #undef __FUNCT__
72*b3804887SHong Zhang #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant"
73*b3804887SHong Zhang PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr)
74*b3804887SHong Zhang {
75*b3804887SHong Zhang   PetscErrorCode       ierr;
76*b3804887SHong Zhang   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
77*b3804887SHong Zhang   PetscInt             i;
78*b3804887SHong Zhang 
79*b3804887SHong Zhang   PetscFunctionBegin;
80*b3804887SHong Zhang   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
81*b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
82*b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
83*b3804887SHong Zhang   for (i=0; i<redund->nrecvs; i++){
84*b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
85*b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
86*b3804887SHong Zhang   }
87*b3804887SHong Zhang   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
88*b3804887SHong Zhang   ierr = PetscFree(redund);CHKERRQ(ierr);
89*b3804887SHong Zhang   PetscFunctionReturn(0);
90*b3804887SHong Zhang }
91*b3804887SHong Zhang 
92*b3804887SHong Zhang #undef __FUNCT__
93*b3804887SHong Zhang #define __FUNCT__ "MatDestroy_MatRedundant"
94*b3804887SHong Zhang PetscErrorCode MatDestroy_MatRedundant(Mat A)
95*b3804887SHong Zhang {
96*b3804887SHong Zhang   PetscErrorCode       ierr;
97*b3804887SHong Zhang   PetscObjectContainer container;
98*b3804887SHong Zhang   Mat_Redundant        *redund=PETSC_NULL;
99*b3804887SHong Zhang 
100*b3804887SHong Zhang   PetscFunctionBegin;
101*b3804887SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
102*b3804887SHong Zhang   if (container) {
103*b3804887SHong Zhang     ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
104*b3804887SHong Zhang   } else {
105*b3804887SHong Zhang     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
106*b3804887SHong Zhang   }
107*b3804887SHong Zhang   A->ops->destroy = redund->MatDestroy;
108*b3804887SHong Zhang   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
109*b3804887SHong Zhang   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
110*b3804887SHong Zhang   ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
111*b3804887SHong Zhang   PetscFunctionReturn(0);
112*b3804887SHong Zhang }
113*b3804887SHong Zhang 
1143f457be1SHong Zhang #undef __FUNCT__
1153f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix"
116f664ae05SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1173f457be1SHong Zhang {
118*b3804887SHong Zhang   PetscMPIInt    rank,size;
1193f457be1SHong Zhang   MPI_Comm       comm=mat->comm;
1203f457be1SHong Zhang   PetscErrorCode ierr;
121*b3804887SHong Zhang   PetscInt       nsends,nrecvs,i,prid=100,rownz_max;
1223f457be1SHong Zhang   PetscMPIInt    *send_rank,*recv_rank;
123*b3804887SHong Zhang   PetscInt       *rowrange=mat->rmap.range;
1243f457be1SHong Zhang   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
125*b3804887SHong Zhang   Mat            A=aij->A,B=aij->B,C=*matredundant;
1263f457be1SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1273f457be1SHong Zhang   PetscScalar    *sbuf_a;
128*b3804887SHong Zhang   PetscInt       nzlocal=a->nz+b->nz;
129*b3804887SHong Zhang   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1303f457be1SHong Zhang   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
131*b3804887SHong Zhang   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1323f457be1SHong Zhang   PetscScalar    *vals,*aworkA,*aworkB;
1333f457be1SHong Zhang   PetscMPIInt    tag1,tag2,tag3,imdex;
1343f457be1SHong Zhang   MPI_Request    *s_waits1,*s_waits2,*s_waits3,*r_waits1,*r_waits2,*r_waits3;
1353f457be1SHong Zhang   MPI_Status     recv_status,*send_status;
1363f457be1SHong Zhang   PetscInt       *sbuf_nz,*rbuf_nz,count;
1373f457be1SHong Zhang   PetscInt       **rbuf_j;
1383f457be1SHong Zhang   PetscScalar    **rbuf_a;
139*b3804887SHong Zhang   Mat_Redundant  *redund=PETSC_NULL;
140*b3804887SHong Zhang   PetscObjectContainer container;
1413f457be1SHong Zhang 
1423f457be1SHong Zhang   PetscFunctionBegin;
1433f457be1SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-prid",&prid,PETSC_NULL);CHKERRQ(ierr);
1443f457be1SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1453f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
146*b3804887SHong Zhang 
147*b3804887SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
148*b3804887SHong Zhang     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
149*b3804887SHong Zhang     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
150*b3804887SHong Zhang     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
151*b3804887SHong Zhang     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
152*b3804887SHong Zhang     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
153*b3804887SHong Zhang     if (container) {
154*b3804887SHong Zhang       ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
155*b3804887SHong Zhang     } else {
156*b3804887SHong Zhang       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
157*b3804887SHong Zhang     }
158*b3804887SHong Zhang     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
159*b3804887SHong Zhang 
160*b3804887SHong Zhang     nsends    = redund->nsends;
161*b3804887SHong Zhang     nrecvs    = redund->nrecvs;
162*b3804887SHong Zhang     send_rank = redund->send_rank; recv_rank = send_rank + size;
163*b3804887SHong Zhang     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
164*b3804887SHong Zhang     sbuf_j    = redund->sbuf_j;
165*b3804887SHong Zhang     sbuf_a    = redund->sbuf_a;
166*b3804887SHong Zhang     rbuf_j    = redund->rbuf_j;
167*b3804887SHong Zhang     rbuf_a    = redund->rbuf_a;
168*b3804887SHong Zhang   }
169*b3804887SHong Zhang 
170*b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
171*b3804887SHong Zhang     PetscMPIInt  subrank,subsize;
172*b3804887SHong Zhang     PetscInt     nleftover,np_subcomm;
173*b3804887SHong Zhang     /* get the destination processors' id send_rank, nsends and nrecvs */
1743f457be1SHong Zhang     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1753f457be1SHong Zhang     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1763f457be1SHong Zhang     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1773f457be1SHong Zhang     recv_rank = send_rank + size;
1783f457be1SHong Zhang     np_subcomm = size/nsubcomm;
1793f457be1SHong Zhang     nleftover  = size - nsubcomm*np_subcomm;
1803f457be1SHong Zhang     nsends = 0; nrecvs = 0;
1813f457be1SHong Zhang     for (i=0; i<size; i++){ /* i=rank*/
1823f457be1SHong Zhang       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1833f457be1SHong Zhang         send_rank[nsends] = i; nsends++;
1843f457be1SHong Zhang         recv_rank[nrecvs++] = i;
1853f457be1SHong Zhang       }
1863f457be1SHong Zhang     }
1873f457be1SHong Zhang     if (rank >= size - nleftover){/* this proc is a leftover processor */
1883f457be1SHong Zhang       i = size-nleftover-1;
1893f457be1SHong Zhang       j = 0;
1903f457be1SHong Zhang       while (j < nsubcomm - nleftover){
1913f457be1SHong Zhang         send_rank[nsends++] = i;
1923f457be1SHong Zhang         i--; j++;
1933f457be1SHong Zhang       }
1943f457be1SHong Zhang     }
1953f457be1SHong Zhang 
1963f457be1SHong Zhang     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1973f457be1SHong Zhang       for (i=0; i<nleftover; i++){
1983f457be1SHong Zhang         recv_rank[nrecvs++] = size-nleftover+i;
1993f457be1SHong Zhang       }
2003f457be1SHong Zhang     }
2013f457be1SHong Zhang 
202*b3804887SHong Zhang     /* allocate sbuf_j, sbuf_a */
203*b3804887SHong Zhang     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
204*b3804887SHong Zhang     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2053f457be1SHong Zhang     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
206*b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2073f457be1SHong Zhang 
208*b3804887SHong Zhang   /* copy mat's local entries into the buffers */
209*b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
210*b3804887SHong Zhang     rownz_max = 0;
2113f457be1SHong Zhang     rptr = sbuf_j;
2123f457be1SHong Zhang     cols = sbuf_j + rend-rstart + 1;
2133f457be1SHong Zhang     vals = sbuf_a;
2143f457be1SHong Zhang     rptr[0] = 0;
2153f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
2163f457be1SHong Zhang       row = i + rstart;
2173f457be1SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2183f457be1SHong Zhang       ncols  = nzA + nzB;
2193f457be1SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2203f457be1SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2213f457be1SHong Zhang       /* load the column indices for this row into cols */
2223f457be1SHong Zhang       lwrite = 0;
2233f457be1SHong Zhang       for (l=0; l<nzB; l++) {
2243f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart){
2253f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
2263f457be1SHong Zhang           cols[lwrite++] = ctmp;
2273f457be1SHong Zhang         }
2283f457be1SHong Zhang       }
2293f457be1SHong Zhang       for (l=0; l<nzA; l++){
2303f457be1SHong Zhang         vals[lwrite]   = aworkA[l];
2313f457be1SHong Zhang         cols[lwrite++] = cstart + cworkA[l];
2323f457be1SHong Zhang       }
2333f457be1SHong Zhang       for (l=0; l<nzB; l++) {
2343f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend){
2353f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
2363f457be1SHong Zhang           cols[lwrite++] = ctmp;
2373f457be1SHong Zhang         }
2383f457be1SHong Zhang       }
2393f457be1SHong Zhang       vals += ncols;
2403f457be1SHong Zhang       cols += ncols;
2413f457be1SHong Zhang       rptr[i+1] = rptr[i] + ncols;
242*b3804887SHong Zhang       if (rownz_max < ncols) rownz_max = ncols;
2433f457be1SHong Zhang     }
2443f457be1SHong Zhang     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
245*b3804887SHong Zhang   } else { /* only copy matrix values into sbuf_a */
246*b3804887SHong Zhang     rptr = sbuf_j;
247*b3804887SHong Zhang     vals = sbuf_a;
248*b3804887SHong Zhang     rptr[0] = 0;
249*b3804887SHong Zhang     for (i=0; i<rend-rstart; i++){
250*b3804887SHong Zhang       row = i + rstart;
251*b3804887SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
252*b3804887SHong Zhang       ncols  = nzA + nzB;
253*b3804887SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
254*b3804887SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
255*b3804887SHong Zhang       lwrite = 0;
256*b3804887SHong Zhang       for (l=0; l<nzB; l++) {
257*b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
258*b3804887SHong Zhang       }
259*b3804887SHong Zhang       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
260*b3804887SHong Zhang       for (l=0; l<nzB; l++) {
261*b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
262*b3804887SHong Zhang       }
263*b3804887SHong Zhang       vals += ncols;
264*b3804887SHong Zhang       rptr[i+1] = rptr[i] + ncols;
265*b3804887SHong Zhang     }
266*b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2673f457be1SHong Zhang 
2683f457be1SHong Zhang   /* send nzlocal to others, and recv other's nzlocal */
2693f457be1SHong Zhang   /*--------------------------------------------------*/
270*b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
271*b3804887SHong Zhang     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
272*b3804887SHong Zhang     s_waits2 = s_waits3 + nsends;
273*b3804887SHong Zhang     s_waits1 = s_waits2 + nsends;
274*b3804887SHong Zhang     r_waits1 = s_waits1 + nsends;
2753f457be1SHong Zhang     r_waits2 = r_waits1 + nrecvs;
2763f457be1SHong Zhang     r_waits3 = r_waits2 + nrecvs;
277*b3804887SHong Zhang   } else {
278*b3804887SHong Zhang     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
279*b3804887SHong Zhang     r_waits3 = s_waits3 + nsends;
280*b3804887SHong Zhang   }
2813f457be1SHong Zhang 
282*b3804887SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
283*b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
284*b3804887SHong Zhang     /* get new tags to keep the communication clean */
285*b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
286*b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
287*b3804887SHong Zhang     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
288*b3804887SHong Zhang     rbuf_nz = sbuf_nz + nsends;
2893f457be1SHong Zhang 
2903f457be1SHong Zhang     /* post receives of other's nzlocal */
2913f457be1SHong Zhang     for (i=0; i<nrecvs; i++){
2923f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2933f457be1SHong Zhang     }
2943f457be1SHong Zhang     /* send nzlocal to others */
2953f457be1SHong Zhang     for (i=0; i<nsends; i++){
2963f457be1SHong Zhang       sbuf_nz[i] = nzlocal;
2973f457be1SHong Zhang       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2983f457be1SHong Zhang     }
2993f457be1SHong Zhang     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
3003f457be1SHong Zhang     count = nrecvs;
3013f457be1SHong Zhang     while (count) {
3023f457be1SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
3033f457be1SHong Zhang       recv_rank[imdex] = recv_status.MPI_SOURCE;
304*b3804887SHong Zhang       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
3053f457be1SHong Zhang       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
3063f457be1SHong Zhang 
307*b3804887SHong Zhang       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
308*b3804887SHong Zhang       rbuf_nz[imdex] += i + 2;
3093f457be1SHong Zhang       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
3103f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
3113f457be1SHong Zhang       count--;
3123f457be1SHong Zhang     }
3133f457be1SHong Zhang     /* wait on sends of nzlocal */
3143f457be1SHong Zhang     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
315*b3804887SHong Zhang     /* send mat->i,j to others, and recv from other's */
316*b3804887SHong Zhang     /*------------------------------------------------*/
317*b3804887SHong Zhang     for (i=0; i<nsends; i++){
318*b3804887SHong Zhang       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
319*b3804887SHong Zhang       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
320*b3804887SHong Zhang     }
321*b3804887SHong Zhang     /* wait on receives of mat->i,j */
322*b3804887SHong Zhang     /*------------------------------*/
323*b3804887SHong Zhang     count = nrecvs;
324*b3804887SHong Zhang     while (count) {
325*b3804887SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
326*b3804887SHong Zhang       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
327*b3804887SHong Zhang       count--;
328*b3804887SHong Zhang     }
329*b3804887SHong Zhang     /* wait on sends of mat->i,j */
330*b3804887SHong Zhang     /*---------------------------*/
331*b3804887SHong Zhang     if (nsends) {
332*b3804887SHong Zhang       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
333*b3804887SHong Zhang     }
334*b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3353f457be1SHong Zhang 
336*b3804887SHong Zhang   /* post receives, send and receive mat->a */
337*b3804887SHong Zhang   /*----------------------------------------*/
338*b3804887SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++) {
339*b3804887SHong Zhang     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
340*b3804887SHong Zhang   }
3413f457be1SHong Zhang   for (i=0; i<nsends; i++){
3423f457be1SHong Zhang     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
3433f457be1SHong Zhang   }
3443f457be1SHong Zhang   count = nrecvs;
3453f457be1SHong Zhang   while (count) {
3463f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
3473f457be1SHong Zhang     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3483f457be1SHong Zhang     count--;
3493f457be1SHong Zhang   }
3503f457be1SHong Zhang   if (nsends) {
3513f457be1SHong Zhang     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
3523f457be1SHong Zhang   }
353*b3804887SHong Zhang 
354*b3804887SHong Zhang   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
3553f457be1SHong Zhang 
3563f457be1SHong Zhang   /* create redundant matrix */
3573f457be1SHong Zhang   /*-------------------------*/
3580ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
359*b3804887SHong Zhang     /* compute rownz_max for preallocation */
360*b3804887SHong Zhang     for (imdex=0; imdex<nrecvs; imdex++){
361*b3804887SHong Zhang       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
362*b3804887SHong Zhang       rptr = rbuf_j[imdex];
363*b3804887SHong Zhang       for (i=0; i<j; i++){
364*b3804887SHong Zhang         ncols = rptr[i+1] - rptr[i];
365*b3804887SHong Zhang         if (rownz_max < ncols) rownz_max = ncols;
366*b3804887SHong Zhang       }
367*b3804887SHong Zhang     }
368*b3804887SHong Zhang 
3693f457be1SHong Zhang     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
3703f457be1SHong Zhang     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3713f457be1SHong Zhang     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
372*b3804887SHong Zhang     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
373*b3804887SHong Zhang     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
3740ae51fcdSHong Zhang   } else {
3750ae51fcdSHong Zhang     C = *matredundant;
3760ae51fcdSHong Zhang   }
377*b3804887SHong Zhang 
3783f457be1SHong Zhang   /* insert local matrix entries */
3793f457be1SHong Zhang   rptr = sbuf_j;
3803f457be1SHong Zhang   cols = sbuf_j + rend-rstart + 1;
3813f457be1SHong Zhang   vals = sbuf_a;
3823f457be1SHong Zhang   for (i=0; i<rend-rstart; i++){
3833f457be1SHong Zhang     row   = i + rstart;
3843f457be1SHong Zhang     ncols = rptr[i+1] - rptr[i];
3853f457be1SHong Zhang     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3863f457be1SHong Zhang     vals += ncols;
3873f457be1SHong Zhang     cols += ncols;
3883f457be1SHong Zhang   }
3893f457be1SHong Zhang   /* insert received matrix entries */
3903f457be1SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++){
3913f457be1SHong Zhang     rstart = rowrange[recv_rank[imdex]];
3923f457be1SHong Zhang     rend   = rowrange[recv_rank[imdex]+1];
3933f457be1SHong Zhang     rptr = rbuf_j[imdex];
3943f457be1SHong Zhang     cols = rbuf_j[imdex] + rend-rstart + 1;
3953f457be1SHong Zhang     vals = rbuf_a[imdex];
3963f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
3973f457be1SHong Zhang       row   = i + rstart;
3983f457be1SHong Zhang       ncols = rptr[i+1] - rptr[i];
3993f457be1SHong Zhang       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4003f457be1SHong Zhang       vals += ncols;
4013f457be1SHong Zhang       cols += ncols;
4023f457be1SHong Zhang     }
4033f457be1SHong Zhang   }
4043f457be1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4053f457be1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4063f457be1SHong Zhang   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
4073f457be1SHong Zhang   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
4080ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
409*b3804887SHong Zhang     PetscObjectContainer container;
4103f457be1SHong Zhang     *matredundant = C;
411*b3804887SHong Zhang     /* create a supporting struct and attach it to C for reuse */
412*b3804887SHong Zhang     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
413*b3804887SHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
414*b3804887SHong Zhang     ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr);
415*b3804887SHong Zhang     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
416*b3804887SHong Zhang     ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr);
4173f457be1SHong Zhang 
418*b3804887SHong Zhang     redund->nzlocal = nzlocal;
419*b3804887SHong Zhang     redund->nsends  = nsends;
420*b3804887SHong Zhang     redund->nrecvs  = nrecvs;
421*b3804887SHong Zhang     redund->send_rank = send_rank;
422*b3804887SHong Zhang     redund->sbuf_nz = sbuf_nz;
423*b3804887SHong Zhang     redund->sbuf_j  = sbuf_j;
424*b3804887SHong Zhang     redund->sbuf_a  = sbuf_a;
425*b3804887SHong Zhang     redund->rbuf_j  = rbuf_j;
426*b3804887SHong Zhang     redund->rbuf_a  = rbuf_a;
427*b3804887SHong Zhang 
428*b3804887SHong Zhang     redund->MatDestroy = C->ops->destroy;
429*b3804887SHong Zhang     C->ops->destroy    = MatDestroy_MatRedundant;
4303f457be1SHong Zhang   }
4313f457be1SHong Zhang   PetscFunctionReturn(0);
4323f457be1SHong Zhang }
4333f457be1SHong Zhang 
4344b9ad928SBarry Smith #undef __FUNCT__
4354b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant"
4366849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc)
4374b9ad928SBarry Smith {
4384b9ad928SBarry Smith   PC_Redundant   *red  = (PC_Redundant*)pc->data;
439dfbe8321SBarry Smith   PetscErrorCode ierr;
4403f457be1SHong Zhang   PetscInt       mstart,mend,mlocal,m;
44113f74950SBarry Smith   PetscMPIInt    size;
4424b9ad928SBarry Smith   IS             isl;
4434b9ad928SBarry Smith   MatReuse       reuse = MAT_INITIAL_MATRIX;
4444b9ad928SBarry Smith   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
4454b9ad928SBarry Smith   MPI_Comm       comm;
44623ce1328SBarry Smith   Vec            vec;
4474b9ad928SBarry Smith 
4483f457be1SHong Zhang   PetscInt    mlocal_sub;
4493f457be1SHong Zhang   PetscMPIInt subsize,subrank;
4503f457be1SHong Zhang   PetscInt    rstart_sub,rend_sub,mloc_sub;
4513f457be1SHong Zhang 
4524b9ad928SBarry Smith   PetscFunctionBegin;
4533f457be1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
45423ce1328SBarry Smith   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
4554b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
45623ce1328SBarry Smith   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
4574b9ad928SBarry Smith   if (!pc->setupcalled) {
4583f457be1SHong Zhang     /* create working vectors xsub/ysub and xdup/ydup */
45923ce1328SBarry Smith     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
4603f457be1SHong Zhang     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
4614b9ad928SBarry Smith 
4623f457be1SHong Zhang     /* get local size of xsub/ysub */
4633f457be1SHong Zhang     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
4643f457be1SHong Zhang     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
4653f457be1SHong Zhang     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
4663f457be1SHong Zhang     if (subrank+1 < subsize){
4673f457be1SHong Zhang       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
4683f457be1SHong Zhang     } else {
4693f457be1SHong Zhang       rend_sub = m;
4703f457be1SHong Zhang     }
4713f457be1SHong Zhang     mloc_sub = rend_sub - rstart_sub;
4723f457be1SHong Zhang     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
4733f457be1SHong Zhang     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
4743f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
4753f457be1SHong Zhang 
4763f457be1SHong Zhang     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
4773f457be1SHong Zhang        Note: we use communicator dupcomm, not pc->comm! */
4783f457be1SHong Zhang     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
4793f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
4803f457be1SHong Zhang 
4813f457be1SHong Zhang     /* create vec scatters */
4823f457be1SHong Zhang     if (!red->scatterin){
4833f457be1SHong Zhang       IS       is1,is2;
4843f457be1SHong Zhang       PetscInt *idx1,*idx2,i,j,k;
4853f457be1SHong Zhang       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
4863f457be1SHong Zhang       idx2 = idx1 + red->nsubcomm*mlocal;
4873f457be1SHong Zhang       j = 0;
4883f457be1SHong Zhang       for (k=0; k<red->nsubcomm; k++){
4893f457be1SHong Zhang         for (i=mstart; i<mend; i++){
4903f457be1SHong Zhang           idx1[j]   = i;
4913f457be1SHong Zhang           idx2[j++] = i + m*k;
4923f457be1SHong Zhang         }
4933f457be1SHong Zhang       }
4943f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
4953f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
4963f457be1SHong Zhang       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
4973f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
4983f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
4993f457be1SHong Zhang 
5003f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
5013f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
5023f457be1SHong Zhang       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
5033f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
5043f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
5053f457be1SHong Zhang       ierr = PetscFree(idx1);CHKERRQ(ierr);
5064b9ad928SBarry Smith     }
5074b9ad928SBarry Smith   }
50823ce1328SBarry Smith   ierr = VecDestroy(vec);CHKERRQ(ierr);
5094b9ad928SBarry Smith 
5104b9ad928SBarry Smith   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
5113f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5124b9ad928SBarry Smith   if (size == 1) {
5134b9ad928SBarry Smith     red->useparallelmat = PETSC_FALSE;
5144b9ad928SBarry Smith   }
5154b9ad928SBarry Smith 
5164b9ad928SBarry Smith   if (red->useparallelmat) {
5174b9ad928SBarry Smith     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
5184b9ad928SBarry Smith       /* destroy old matrices */
5194b9ad928SBarry Smith       if (red->pmats) {
520*b3804887SHong Zhang         //ierr = MatDestroyMatrices(1,&red->pmats);CHKERRQ(ierr);
521*b3804887SHong Zhang         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
5224b9ad928SBarry Smith       }
5234b9ad928SBarry Smith     } else if (pc->setupcalled == 1) {
5244b9ad928SBarry Smith       reuse = MAT_REUSE_MATRIX;
5254b9ad928SBarry Smith       str   = SAME_NONZERO_PATTERN;
5264b9ad928SBarry Smith     }
5274b9ad928SBarry Smith 
5283f457be1SHong Zhang     /* grab the parallel matrix and put it into processors of a subcomminicator */
529f664ae05SHong Zhang     /*--------------------------------------------------------------------------*/
530f664ae05SHong Zhang     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
531*b3804887SHong Zhang     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
5323f457be1SHong Zhang     /* tell PC of the subcommunicator its operators */
533*b3804887SHong Zhang     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
5344b9ad928SBarry Smith   } else {
5354b9ad928SBarry Smith     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
5364b9ad928SBarry Smith   }
5374b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
5384b9ad928SBarry Smith   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
5394b9ad928SBarry Smith   PetscFunctionReturn(0);
5404b9ad928SBarry Smith }
5414b9ad928SBarry Smith 
5424b9ad928SBarry Smith #undef __FUNCT__
5434b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant"
5446849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
5454b9ad928SBarry Smith {
5464b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
547dfbe8321SBarry Smith   PetscErrorCode ierr;
5483f457be1SHong Zhang   PetscScalar    *array;
5494b9ad928SBarry Smith 
5504b9ad928SBarry Smith   PetscFunctionBegin;
5513f457be1SHong Zhang   /* scatter x to xdup */
5523f457be1SHong Zhang   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
5533f457be1SHong Zhang   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
5543f457be1SHong Zhang 
5553f457be1SHong Zhang   /* place xdup's local array into xsub */
5563f457be1SHong Zhang   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
5573f457be1SHong Zhang   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
5584b9ad928SBarry Smith 
5594b9ad928SBarry Smith   /* apply preconditioner on each processor */
5603f457be1SHong Zhang   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
5613f457be1SHong Zhang   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
5623f457be1SHong Zhang   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
5634b9ad928SBarry Smith 
5643f457be1SHong Zhang   /* place ysub's local array into ydup */
5653f457be1SHong Zhang   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
5663f457be1SHong Zhang   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
5673f457be1SHong Zhang 
5683f457be1SHong Zhang   /* scatter ydup to y */
5693f457be1SHong Zhang   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
5703f457be1SHong Zhang   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
5713f457be1SHong Zhang   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
5723f457be1SHong Zhang   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
5734b9ad928SBarry Smith   PetscFunctionReturn(0);
5744b9ad928SBarry Smith }
5754b9ad928SBarry Smith 
5764b9ad928SBarry Smith #undef __FUNCT__
5774b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant"
5786849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc)
5794b9ad928SBarry Smith {
5804b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
581dfbe8321SBarry Smith   PetscErrorCode ierr;
5824b9ad928SBarry Smith 
5834b9ad928SBarry Smith   PetscFunctionBegin;
5844b9ad928SBarry Smith   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
5854b9ad928SBarry Smith   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
5863f457be1SHong Zhang   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
5873f457be1SHong Zhang   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
5883f457be1SHong Zhang   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
5893f457be1SHong Zhang   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
590*b3804887SHong Zhang   if (red->pmats) {
591*b3804887SHong Zhang     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
5923f457be1SHong Zhang   }
5933f457be1SHong Zhang 
594*b3804887SHong Zhang 
5954b9ad928SBarry Smith   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
5964b9ad928SBarry Smith   ierr = PetscFree(red);CHKERRQ(ierr);
5974b9ad928SBarry Smith   PetscFunctionReturn(0);
5984b9ad928SBarry Smith }
5994b9ad928SBarry Smith 
6004b9ad928SBarry Smith #undef __FUNCT__
6014b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant"
6026849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
6034b9ad928SBarry Smith {
6044b9ad928SBarry Smith   PetscFunctionBegin;
6054b9ad928SBarry Smith   PetscFunctionReturn(0);
6064b9ad928SBarry Smith }
6074b9ad928SBarry Smith 
6084b9ad928SBarry Smith EXTERN_C_BEGIN
6094b9ad928SBarry Smith #undef __FUNCT__
6104b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant"
611dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
6124b9ad928SBarry Smith {
6134b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
614dfbe8321SBarry Smith   PetscErrorCode ierr;
6154b9ad928SBarry Smith 
6164b9ad928SBarry Smith   PetscFunctionBegin;
6174b9ad928SBarry Smith   red->scatterin  = in;
6184b9ad928SBarry Smith   red->scatterout = out;
6194b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
6204b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
6214b9ad928SBarry Smith   PetscFunctionReturn(0);
6224b9ad928SBarry Smith }
6234b9ad928SBarry Smith EXTERN_C_END
6244b9ad928SBarry Smith 
6254b9ad928SBarry Smith #undef __FUNCT__
6264b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter"
6274b9ad928SBarry Smith /*@
6284b9ad928SBarry Smith    PCRedundantSetScatter - Sets the scatter used to copy values into the
6294b9ad928SBarry Smith      redundant local solve and the scatter to move them back into the global
6304b9ad928SBarry Smith      vector.
6314b9ad928SBarry Smith 
6324b9ad928SBarry Smith    Collective on PC
6334b9ad928SBarry Smith 
6344b9ad928SBarry Smith    Input Parameters:
6354b9ad928SBarry Smith +  pc - the preconditioner context
6364b9ad928SBarry Smith .  in - the scatter to move the values in
6374b9ad928SBarry Smith -  out - the scatter to move them out
6384b9ad928SBarry Smith 
6394b9ad928SBarry Smith    Level: advanced
6404b9ad928SBarry Smith 
6414b9ad928SBarry Smith .keywords: PC, redundant solve
6424b9ad928SBarry Smith @*/
643dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
6444b9ad928SBarry Smith {
645dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
6464b9ad928SBarry Smith 
6474b9ad928SBarry Smith   PetscFunctionBegin;
6484482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6494482741eSBarry Smith   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
6504482741eSBarry Smith   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
6514b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
6524b9ad928SBarry Smith   if (f) {
6534b9ad928SBarry Smith     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
6544b9ad928SBarry Smith   }
6554b9ad928SBarry Smith   PetscFunctionReturn(0);
6564b9ad928SBarry Smith }
6574b9ad928SBarry Smith 
6584b9ad928SBarry Smith EXTERN_C_BEGIN
6594b9ad928SBarry Smith #undef __FUNCT__
6604b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant"
661dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
6624b9ad928SBarry Smith {
6634b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
6644b9ad928SBarry Smith 
6654b9ad928SBarry Smith   PetscFunctionBegin;
6664b9ad928SBarry Smith   *innerpc = red->pc;
6674b9ad928SBarry Smith   PetscFunctionReturn(0);
6684b9ad928SBarry Smith }
6694b9ad928SBarry Smith EXTERN_C_END
6704b9ad928SBarry Smith 
6714b9ad928SBarry Smith #undef __FUNCT__
6724b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC"
6734b9ad928SBarry Smith /*@
6744b9ad928SBarry Smith    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
6754b9ad928SBarry Smith 
6764b9ad928SBarry Smith    Not Collective
6774b9ad928SBarry Smith 
6784b9ad928SBarry Smith    Input Parameter:
6794b9ad928SBarry Smith .  pc - the preconditioner context
6804b9ad928SBarry Smith 
6814b9ad928SBarry Smith    Output Parameter:
6824b9ad928SBarry Smith .  innerpc - the sequential PC
6834b9ad928SBarry Smith 
6844b9ad928SBarry Smith    Level: advanced
6854b9ad928SBarry Smith 
6864b9ad928SBarry Smith .keywords: PC, redundant solve
6874b9ad928SBarry Smith @*/
688dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
6894b9ad928SBarry Smith {
690dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,PC*);
6914b9ad928SBarry Smith 
6924b9ad928SBarry Smith   PetscFunctionBegin;
6934482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6944482741eSBarry Smith   PetscValidPointer(innerpc,2);
6954b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
6964b9ad928SBarry Smith   if (f) {
6974b9ad928SBarry Smith     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
6984b9ad928SBarry Smith   }
6994b9ad928SBarry Smith   PetscFunctionReturn(0);
7004b9ad928SBarry Smith }
7014b9ad928SBarry Smith 
7024b9ad928SBarry Smith EXTERN_C_BEGIN
7034b9ad928SBarry Smith #undef __FUNCT__
7044b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant"
705dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
7064b9ad928SBarry Smith {
7074b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
7084b9ad928SBarry Smith 
7094b9ad928SBarry Smith   PetscFunctionBegin;
710*b3804887SHong Zhang   if (mat)  *mat  = red->pmats;
711*b3804887SHong Zhang   if (pmat) *pmat = red->pmats;
7124b9ad928SBarry Smith   PetscFunctionReturn(0);
7134b9ad928SBarry Smith }
7144b9ad928SBarry Smith EXTERN_C_END
7154b9ad928SBarry Smith 
7164b9ad928SBarry Smith #undef __FUNCT__
7174b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators"
7184b9ad928SBarry Smith /*@
7194b9ad928SBarry Smith    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
7204b9ad928SBarry Smith 
7214b9ad928SBarry Smith    Not Collective
7224b9ad928SBarry Smith 
7234b9ad928SBarry Smith    Input Parameter:
7244b9ad928SBarry Smith .  pc - the preconditioner context
7254b9ad928SBarry Smith 
7264b9ad928SBarry Smith    Output Parameters:
7274b9ad928SBarry Smith +  mat - the matrix
7284b9ad928SBarry Smith -  pmat - the (possibly different) preconditioner matrix
7294b9ad928SBarry Smith 
7304b9ad928SBarry Smith    Level: advanced
7314b9ad928SBarry Smith 
7324b9ad928SBarry Smith .keywords: PC, redundant solve
7334b9ad928SBarry Smith @*/
734dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
7354b9ad928SBarry Smith {
736dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
7374b9ad928SBarry Smith 
7384b9ad928SBarry Smith   PetscFunctionBegin;
7394482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
7404482741eSBarry Smith   if (mat)  PetscValidPointer(mat,2);
7414482741eSBarry Smith   if (pmat) PetscValidPointer(pmat,3);
7424b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
7434b9ad928SBarry Smith   if (f) {
7444b9ad928SBarry Smith     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
7454b9ad928SBarry Smith   }
7464b9ad928SBarry Smith   PetscFunctionReturn(0);
7474b9ad928SBarry Smith }
7484b9ad928SBarry Smith 
7494b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/
75037a17b4dSBarry Smith /*MC
75137a17b4dSBarry Smith      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
75237a17b4dSBarry Smith 
75337a17b4dSBarry Smith 
75437a17b4dSBarry Smith      Options for the redundant preconditioners can be set with -redundant_pc_xxx
75537a17b4dSBarry Smith 
75637a17b4dSBarry Smith    Level: intermediate
75737a17b4dSBarry Smith 
75837a17b4dSBarry Smith 
75937a17b4dSBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
76037a17b4dSBarry Smith            PCRedundantGetPC(), PCRedundantGetOperators()
76137a17b4dSBarry Smith 
76237a17b4dSBarry Smith M*/
76337a17b4dSBarry Smith 
7644b9ad928SBarry Smith EXTERN_C_BEGIN
7654b9ad928SBarry Smith #undef __FUNCT__
7664b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant"
767dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
7684b9ad928SBarry Smith {
769dfbe8321SBarry Smith   PetscErrorCode ierr;
7704b9ad928SBarry Smith   PC_Redundant   *red;
771e060cb09SBarry Smith   const char     *prefix;
7724b9ad928SBarry Smith 
7733f457be1SHong Zhang   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
7743f457be1SHong Zhang   PetscMPIInt    rank,size,subrank,*subsize;
7753f457be1SHong Zhang   MPI_Comm       subcomm;
7763f457be1SHong Zhang   PetscMPIInt    duprank;
7773f457be1SHong Zhang   PetscMPIInt    rank_dup,size_dup;
7783f457be1SHong Zhang   MPI_Comm       dupcomm;
7793f457be1SHong Zhang 
7804b9ad928SBarry Smith   PetscFunctionBegin;
7814b9ad928SBarry Smith   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
78252e6d16bSBarry Smith   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
7834b9ad928SBarry Smith   red->useparallelmat   = PETSC_TRUE;
7844b9ad928SBarry Smith 
7853f457be1SHong Zhang   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
7863f457be1SHong Zhang   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
7873f457be1SHong Zhang   nsubcomm = size;
7883f457be1SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
7893f457be1SHong Zhang   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
7903f457be1SHong Zhang 
7913f457be1SHong Zhang   /* get size of each subcommunicators */
7923f457be1SHong Zhang   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
7933f457be1SHong Zhang   np_subcomm = size/nsubcomm;
7943f457be1SHong Zhang   nleftover  = size - nsubcomm*np_subcomm;
7953f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
7963f457be1SHong Zhang     subsize[i] = np_subcomm;
7973f457be1SHong Zhang     if (i<nleftover) subsize[i]++;
7983f457be1SHong Zhang   }
7993f457be1SHong Zhang 
8003f457be1SHong Zhang   /* find color for this proc */
8013f457be1SHong Zhang   color   = rank%nsubcomm;
8023f457be1SHong Zhang   subrank = rank/nsubcomm;
8033f457be1SHong Zhang 
8043f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
8053f457be1SHong Zhang   red->subcomm  = subcomm;
8063f457be1SHong Zhang   red->color    = color;
8073f457be1SHong Zhang   red->nsubcomm = nsubcomm;
8083f457be1SHong Zhang 
8093f457be1SHong Zhang   j = 0; duprank = 0;
8103f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
8113f457be1SHong Zhang     if (j == color){
8123f457be1SHong Zhang       duprank += subrank;
8133f457be1SHong Zhang       break;
8143f457be1SHong Zhang     }
8153f457be1SHong Zhang     duprank += subsize[i]; j++;
8163f457be1SHong Zhang   }
8173f457be1SHong Zhang   /*
8183f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] color %d, subrank %d, duprank %d\n",rank,color,subrank,duprank);
8193f457be1SHong Zhang   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
8203f457be1SHong Zhang   */
8213f457be1SHong Zhang 
8223f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
8233f457be1SHong Zhang   ierr = MPI_Comm_rank(dupcomm,&rank_dup);CHKERRQ(ierr);
8243f457be1SHong Zhang   ierr = MPI_Comm_size(dupcomm,&size_dup);CHKERRQ(ierr);
8253f457be1SHong Zhang   /*
8263f457be1SHong Zhang   ierr = PetscSynchronizedPrintf(pc->comm, "[%d] duprank %d\n",rank,duprank);
8273f457be1SHong Zhang   ierr = PetscSynchronizedFlush(pc->comm);CHKERRQ(ierr);
8283f457be1SHong Zhang   */
8293f457be1SHong Zhang   red->dupcomm = dupcomm;
8303f457be1SHong Zhang   ierr = PetscFree(subsize);CHKERRQ(ierr);
8313f457be1SHong Zhang 
8324b9ad928SBarry Smith   /* create the sequential PC that each processor has copy of */
8333f457be1SHong Zhang   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
8344b9ad928SBarry Smith   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
8354b9ad928SBarry Smith   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
8364b9ad928SBarry Smith   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
8374b9ad928SBarry Smith   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
8384b9ad928SBarry Smith 
8394b9ad928SBarry Smith   pc->ops->apply             = PCApply_Redundant;
8404b9ad928SBarry Smith   pc->ops->applytranspose    = 0;
8414b9ad928SBarry Smith   pc->ops->setup             = PCSetUp_Redundant;
8424b9ad928SBarry Smith   pc->ops->destroy           = PCDestroy_Redundant;
8434b9ad928SBarry Smith   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
8444b9ad928SBarry Smith   pc->ops->setuponblocks     = 0;
8454b9ad928SBarry Smith   pc->ops->view              = PCView_Redundant;
8464b9ad928SBarry Smith   pc->ops->applyrichardson   = 0;
8474b9ad928SBarry Smith 
8484b9ad928SBarry Smith   pc->data     = (void*)red;
8494b9ad928SBarry Smith 
8504b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
8514b9ad928SBarry Smith                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
8524b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
8534b9ad928SBarry Smith                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
8544b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
8554b9ad928SBarry Smith                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
8564b9ad928SBarry Smith 
8574b9ad928SBarry Smith   PetscFunctionReturn(0);
8584b9ad928SBarry Smith }
8594b9ad928SBarry Smith EXTERN_C_END
860