xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision a98ce0f41cecbcf583ea61eee7d4bfcfd656d38d)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2dba47a55SKris Buschelman 
34b9ad928SBarry Smith /*
43f457be1SHong Zhang   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
54b9ad928SBarry Smith */
66356e834SBarry Smith #include "private/pcimpl.h"     /*I "petscpc.h" I*/
74b9ad928SBarry Smith #include "petscksp.h"
84b9ad928SBarry Smith 
94b9ad928SBarry Smith typedef struct {
104b9ad928SBarry Smith   PC         pc;                   /* actual preconditioner used on each processor */
113f457be1SHong Zhang   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
123f457be1SHong Zhang   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13b3804887SHong Zhang   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
143f457be1SHong Zhang   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
154b9ad928SBarry Smith   PetscTruth useparallelmat;
163f457be1SHong Zhang   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
173f457be1SHong Zhang   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
183f457be1SHong Zhang                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
193f457be1SHong Zhang   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
203f457be1SHong Zhang   PetscInt   color;                /* color of processors in a subcommunicator */
214b9ad928SBarry Smith } PC_Redundant;
224b9ad928SBarry Smith 
234b9ad928SBarry Smith #undef __FUNCT__
244b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant"
256849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
264b9ad928SBarry Smith {
274b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
28dfbe8321SBarry Smith   PetscErrorCode ierr;
2913f74950SBarry Smith   PetscMPIInt    rank;
3032077d6dSBarry Smith   PetscTruth     iascii,isstring;
31a47c9f9aSHong Zhang   PetscViewer    sviewer,subviewer;
32a47c9f9aSHong Zhang   PetscInt       color = red->color;
334b9ad928SBarry Smith 
344b9ad928SBarry Smith   PetscFunctionBegin;
354b9ad928SBarry Smith   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
3632077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
374b9ad928SBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
3832077d6dSBarry Smith   if (iascii) {
39*a98ce0f4SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: First PC (color=0) follows\n");CHKERRQ(ierr);
40*a98ce0f4SHong Zhang     ierr = PetscViewerGetSubcomm(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
41*a98ce0f4SHong Zhang     if (!color) { /* only view first redundant pc */
424b9ad928SBarry Smith       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
43a47c9f9aSHong Zhang       ierr = PCView(red->pc,subviewer);CHKERRQ(ierr);
444b9ad928SBarry Smith       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
454b9ad928SBarry Smith     }
46*a98ce0f4SHong Zhang     ierr = PetscViewerRestoreSubcomm(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
47*a98ce0f4SHong Zhang   } else if (isstring) { /* not test it yet! */
484b9ad928SBarry Smith     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
494b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
504b9ad928SBarry Smith     if (!rank) {
514b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
524b9ad928SBarry Smith     }
534b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
544b9ad928SBarry Smith   } else {
5579a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
564b9ad928SBarry Smith   }
574b9ad928SBarry Smith   PetscFunctionReturn(0);
584b9ad928SBarry Smith }
594b9ad928SBarry Smith 
60b9147fbbSdalcinl #include "include/private/matimpl.h"        /*I "petscmat.h" I*/
613f457be1SHong Zhang #include "private/vecimpl.h"
623f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
633f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
643f457be1SHong Zhang 
65b3804887SHong Zhang typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
66b3804887SHong Zhang   PetscInt       nzlocal,nsends,nrecvs;
67b3804887SHong Zhang   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
68b3804887SHong Zhang   PetscScalar    *sbuf_a,**rbuf_a;
69b3804887SHong Zhang   PetscErrorCode (*MatDestroy)(Mat);
70b3804887SHong Zhang } Mat_Redundant;
71b3804887SHong Zhang 
72b3804887SHong Zhang #undef __FUNCT__
73776b82aeSLisandro Dalcin #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
74776b82aeSLisandro Dalcin PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
75b3804887SHong Zhang {
76b3804887SHong Zhang   PetscErrorCode       ierr;
77b3804887SHong Zhang   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
78b3804887SHong Zhang   PetscInt             i;
79b3804887SHong Zhang 
80b3804887SHong Zhang   PetscFunctionBegin;
81b3804887SHong Zhang   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
82b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
83b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
84b3804887SHong Zhang   for (i=0; i<redund->nrecvs; i++){
85b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
86b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
87b3804887SHong Zhang   }
88b3804887SHong Zhang   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
89b3804887SHong Zhang   ierr = PetscFree(redund);CHKERRQ(ierr);
90b3804887SHong Zhang   PetscFunctionReturn(0);
91b3804887SHong Zhang }
92b3804887SHong Zhang 
93b3804887SHong Zhang #undef __FUNCT__
94b3804887SHong Zhang #define __FUNCT__ "MatDestroy_MatRedundant"
95b3804887SHong Zhang PetscErrorCode MatDestroy_MatRedundant(Mat A)
96b3804887SHong Zhang {
97b3804887SHong Zhang   PetscErrorCode       ierr;
98776b82aeSLisandro Dalcin   PetscContainer container;
99b3804887SHong Zhang   Mat_Redundant        *redund=PETSC_NULL;
100b3804887SHong Zhang 
101b3804887SHong Zhang   PetscFunctionBegin;
102b3804887SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
103b3804887SHong Zhang   if (container) {
104776b82aeSLisandro Dalcin     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
105b3804887SHong Zhang   } else {
106b3804887SHong Zhang     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
107b3804887SHong Zhang   }
108b3804887SHong Zhang   A->ops->destroy = redund->MatDestroy;
109b3804887SHong Zhang   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
110b3804887SHong Zhang   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
111776b82aeSLisandro Dalcin   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
112b3804887SHong Zhang   PetscFunctionReturn(0);
113b3804887SHong Zhang }
114b3804887SHong Zhang 
1153f457be1SHong Zhang #undef __FUNCT__
1163f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix"
117f664ae05SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1183f457be1SHong Zhang {
119b3804887SHong Zhang   PetscMPIInt    rank,size;
1203f457be1SHong Zhang   MPI_Comm       comm=mat->comm;
1213f457be1SHong Zhang   PetscErrorCode ierr;
1227c7c70f1SSatish Balay   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1233365e775SHong Zhang   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
124b3804887SHong Zhang   PetscInt       *rowrange=mat->rmap.range;
1253f457be1SHong Zhang   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
126b3804887SHong Zhang   Mat            A=aij->A,B=aij->B,C=*matredundant;
1273f457be1SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1283f457be1SHong Zhang   PetscScalar    *sbuf_a;
129b3804887SHong Zhang   PetscInt       nzlocal=a->nz+b->nz;
130b3804887SHong Zhang   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1313f457be1SHong Zhang   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
132b3804887SHong Zhang   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1333f457be1SHong Zhang   PetscScalar    *vals,*aworkA,*aworkB;
1343f457be1SHong Zhang   PetscMPIInt    tag1,tag2,tag3,imdex;
1353365e775SHong Zhang   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1363365e775SHong Zhang                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1373f457be1SHong Zhang   MPI_Status     recv_status,*send_status;
1383365e775SHong Zhang   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1393365e775SHong Zhang   PetscInt       **rbuf_j=PETSC_NULL;
1403365e775SHong Zhang   PetscScalar    **rbuf_a=PETSC_NULL;
141b3804887SHong Zhang   Mat_Redundant  *redund=PETSC_NULL;
142776b82aeSLisandro Dalcin   PetscContainer container;
1433f457be1SHong Zhang 
1443f457be1SHong Zhang   PetscFunctionBegin;
1453f457be1SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1463f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
147b3804887SHong Zhang 
148b3804887SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
149b3804887SHong Zhang     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
150b3804887SHong Zhang     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
151b3804887SHong Zhang     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
152b3804887SHong Zhang     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
153b3804887SHong Zhang     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
154b3804887SHong Zhang     if (container) {
155776b82aeSLisandro Dalcin       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
156b3804887SHong Zhang     } else {
157b3804887SHong Zhang       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
158b3804887SHong Zhang     }
159b3804887SHong Zhang     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
160b3804887SHong Zhang 
161b3804887SHong Zhang     nsends    = redund->nsends;
162b3804887SHong Zhang     nrecvs    = redund->nrecvs;
163b3804887SHong Zhang     send_rank = redund->send_rank; recv_rank = send_rank + size;
164b3804887SHong Zhang     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
165b3804887SHong Zhang     sbuf_j    = redund->sbuf_j;
166b3804887SHong Zhang     sbuf_a    = redund->sbuf_a;
167b3804887SHong Zhang     rbuf_j    = redund->rbuf_j;
168b3804887SHong Zhang     rbuf_a    = redund->rbuf_a;
169b3804887SHong Zhang   }
170b3804887SHong Zhang 
171b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
172b3804887SHong Zhang     PetscMPIInt  subrank,subsize;
173b3804887SHong Zhang     PetscInt     nleftover,np_subcomm;
174b3804887SHong Zhang     /* get the destination processors' id send_rank, nsends and nrecvs */
1753f457be1SHong Zhang     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1763f457be1SHong Zhang     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1773f457be1SHong Zhang     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1783f457be1SHong Zhang     recv_rank = send_rank + size;
1793f457be1SHong Zhang     np_subcomm = size/nsubcomm;
1803f457be1SHong Zhang     nleftover  = size - nsubcomm*np_subcomm;
1813f457be1SHong Zhang     nsends = 0; nrecvs = 0;
1823f457be1SHong Zhang     for (i=0; i<size; i++){ /* i=rank*/
1833f457be1SHong Zhang       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1843f457be1SHong Zhang         send_rank[nsends] = i; nsends++;
1853f457be1SHong Zhang         recv_rank[nrecvs++] = i;
1863f457be1SHong Zhang       }
1873f457be1SHong Zhang     }
1883f457be1SHong Zhang     if (rank >= size - nleftover){/* this proc is a leftover processor */
1893f457be1SHong Zhang       i = size-nleftover-1;
1903f457be1SHong Zhang       j = 0;
1913f457be1SHong Zhang       while (j < nsubcomm - nleftover){
1923f457be1SHong Zhang         send_rank[nsends++] = i;
1933f457be1SHong Zhang         i--; j++;
1943f457be1SHong Zhang       }
1953f457be1SHong Zhang     }
1963f457be1SHong Zhang 
1973f457be1SHong Zhang     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1983f457be1SHong Zhang       for (i=0; i<nleftover; i++){
1993f457be1SHong Zhang         recv_rank[nrecvs++] = size-nleftover+i;
2003f457be1SHong Zhang       }
2013f457be1SHong Zhang     }
2023f457be1SHong Zhang 
203b3804887SHong Zhang     /* allocate sbuf_j, sbuf_a */
204b3804887SHong Zhang     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
205b3804887SHong Zhang     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2063f457be1SHong Zhang     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
207b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2083f457be1SHong Zhang 
209b3804887SHong Zhang   /* copy mat's local entries into the buffers */
210b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
211b3804887SHong Zhang     rownz_max = 0;
2123f457be1SHong Zhang     rptr = sbuf_j;
2133f457be1SHong Zhang     cols = sbuf_j + rend-rstart + 1;
2143f457be1SHong Zhang     vals = sbuf_a;
2153f457be1SHong Zhang     rptr[0] = 0;
2163f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
2173f457be1SHong Zhang       row = i + rstart;
2183f457be1SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2193f457be1SHong Zhang       ncols  = nzA + nzB;
2203f457be1SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2213f457be1SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2223f457be1SHong Zhang       /* load the column indices for this row into cols */
2233f457be1SHong Zhang       lwrite = 0;
2243f457be1SHong Zhang       for (l=0; l<nzB; l++) {
2253f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart){
2263f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
2273f457be1SHong Zhang           cols[lwrite++] = ctmp;
2283f457be1SHong Zhang         }
2293f457be1SHong Zhang       }
2303f457be1SHong Zhang       for (l=0; l<nzA; l++){
2313f457be1SHong Zhang         vals[lwrite]   = aworkA[l];
2323f457be1SHong Zhang         cols[lwrite++] = cstart + cworkA[l];
2333f457be1SHong Zhang       }
2343f457be1SHong Zhang       for (l=0; l<nzB; l++) {
2353f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend){
2363f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
2373f457be1SHong Zhang           cols[lwrite++] = ctmp;
2383f457be1SHong Zhang         }
2393f457be1SHong Zhang       }
2403f457be1SHong Zhang       vals += ncols;
2413f457be1SHong Zhang       cols += ncols;
2423f457be1SHong Zhang       rptr[i+1] = rptr[i] + ncols;
243b3804887SHong Zhang       if (rownz_max < ncols) rownz_max = ncols;
2443f457be1SHong Zhang     }
2453f457be1SHong Zhang     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
246b3804887SHong Zhang   } else { /* only copy matrix values into sbuf_a */
247b3804887SHong Zhang     rptr = sbuf_j;
248b3804887SHong Zhang     vals = sbuf_a;
249b3804887SHong Zhang     rptr[0] = 0;
250b3804887SHong Zhang     for (i=0; i<rend-rstart; i++){
251b3804887SHong Zhang       row = i + rstart;
252b3804887SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
253b3804887SHong Zhang       ncols  = nzA + nzB;
254b3804887SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
255b3804887SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
256b3804887SHong Zhang       lwrite = 0;
257b3804887SHong Zhang       for (l=0; l<nzB; l++) {
258b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
259b3804887SHong Zhang       }
260b3804887SHong Zhang       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
261b3804887SHong Zhang       for (l=0; l<nzB; l++) {
262b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
263b3804887SHong Zhang       }
264b3804887SHong Zhang       vals += ncols;
265b3804887SHong Zhang       rptr[i+1] = rptr[i] + ncols;
266b3804887SHong Zhang     }
267b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2683f457be1SHong Zhang 
2693f457be1SHong Zhang   /* send nzlocal to others, and recv other's nzlocal */
2703f457be1SHong Zhang   /*--------------------------------------------------*/
271b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
272b3804887SHong Zhang     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
273b3804887SHong Zhang     s_waits2 = s_waits3 + nsends;
274b3804887SHong Zhang     s_waits1 = s_waits2 + nsends;
275b3804887SHong Zhang     r_waits1 = s_waits1 + nsends;
2763f457be1SHong Zhang     r_waits2 = r_waits1 + nrecvs;
2773f457be1SHong Zhang     r_waits3 = r_waits2 + nrecvs;
278b3804887SHong Zhang   } else {
279b3804887SHong Zhang     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
280b3804887SHong Zhang     r_waits3 = s_waits3 + nsends;
281b3804887SHong Zhang   }
2823f457be1SHong Zhang 
283b3804887SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
284b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
285b3804887SHong Zhang     /* get new tags to keep the communication clean */
286b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
287b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
288b3804887SHong Zhang     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
289b3804887SHong Zhang     rbuf_nz = sbuf_nz + nsends;
2903f457be1SHong Zhang 
2913f457be1SHong Zhang     /* post receives of other's nzlocal */
2923f457be1SHong Zhang     for (i=0; i<nrecvs; i++){
2933f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2943f457be1SHong Zhang     }
2953f457be1SHong Zhang     /* send nzlocal to others */
2963f457be1SHong Zhang     for (i=0; i<nsends; i++){
2973f457be1SHong Zhang       sbuf_nz[i] = nzlocal;
2983f457be1SHong Zhang       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2993f457be1SHong Zhang     }
3003f457be1SHong Zhang     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
3013f457be1SHong Zhang     count = nrecvs;
3023f457be1SHong Zhang     while (count) {
3033f457be1SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
3043f457be1SHong Zhang       recv_rank[imdex] = recv_status.MPI_SOURCE;
305b3804887SHong Zhang       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
3063f457be1SHong Zhang       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
3073f457be1SHong Zhang 
308b3804887SHong Zhang       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
309b3804887SHong Zhang       rbuf_nz[imdex] += i + 2;
3103f457be1SHong Zhang       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
3113f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
3123f457be1SHong Zhang       count--;
3133f457be1SHong Zhang     }
3143f457be1SHong Zhang     /* wait on sends of nzlocal */
3153f457be1SHong Zhang     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
316b3804887SHong Zhang     /* send mat->i,j to others, and recv from other's */
317b3804887SHong Zhang     /*------------------------------------------------*/
318b3804887SHong Zhang     for (i=0; i<nsends; i++){
319b3804887SHong Zhang       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
320b3804887SHong Zhang       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
321b3804887SHong Zhang     }
322b3804887SHong Zhang     /* wait on receives of mat->i,j */
323b3804887SHong Zhang     /*------------------------------*/
324b3804887SHong Zhang     count = nrecvs;
325b3804887SHong Zhang     while (count) {
326b3804887SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
327b3804887SHong Zhang       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
328b3804887SHong Zhang       count--;
329b3804887SHong Zhang     }
330b3804887SHong Zhang     /* wait on sends of mat->i,j */
331b3804887SHong Zhang     /*---------------------------*/
332b3804887SHong Zhang     if (nsends) {
333b3804887SHong Zhang       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
334b3804887SHong Zhang     }
335b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3363f457be1SHong Zhang 
337b3804887SHong Zhang   /* post receives, send and receive mat->a */
338b3804887SHong Zhang   /*----------------------------------------*/
339b3804887SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++) {
340b3804887SHong Zhang     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
341b3804887SHong Zhang   }
3423f457be1SHong Zhang   for (i=0; i<nsends; i++){
3433f457be1SHong Zhang     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
3443f457be1SHong Zhang   }
3453f457be1SHong Zhang   count = nrecvs;
3463f457be1SHong Zhang   while (count) {
3473f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
3483f457be1SHong Zhang     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3493f457be1SHong Zhang     count--;
3503f457be1SHong Zhang   }
3513f457be1SHong Zhang   if (nsends) {
3523f457be1SHong Zhang     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
3533f457be1SHong Zhang   }
354b3804887SHong Zhang 
355b3804887SHong Zhang   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
3563f457be1SHong Zhang 
3573f457be1SHong Zhang   /* create redundant matrix */
3583f457be1SHong Zhang   /*-------------------------*/
3590ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
360b3804887SHong Zhang     /* compute rownz_max for preallocation */
361b3804887SHong Zhang     for (imdex=0; imdex<nrecvs; imdex++){
362b3804887SHong Zhang       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
363b3804887SHong Zhang       rptr = rbuf_j[imdex];
364b3804887SHong Zhang       for (i=0; i<j; i++){
365b3804887SHong Zhang         ncols = rptr[i+1] - rptr[i];
366b3804887SHong Zhang         if (rownz_max < ncols) rownz_max = ncols;
367b3804887SHong Zhang       }
368b3804887SHong Zhang     }
369b3804887SHong Zhang 
3703f457be1SHong Zhang     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
3713f457be1SHong Zhang     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3723f457be1SHong Zhang     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
373b3804887SHong Zhang     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
374b3804887SHong Zhang     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
3750ae51fcdSHong Zhang   } else {
3760ae51fcdSHong Zhang     C = *matredundant;
3770ae51fcdSHong Zhang   }
378b3804887SHong Zhang 
3793f457be1SHong Zhang   /* insert local matrix entries */
3803f457be1SHong Zhang   rptr = sbuf_j;
3813f457be1SHong Zhang   cols = sbuf_j + rend-rstart + 1;
3823f457be1SHong Zhang   vals = sbuf_a;
3833f457be1SHong Zhang   for (i=0; i<rend-rstart; i++){
3843f457be1SHong Zhang     row   = i + rstart;
3853f457be1SHong Zhang     ncols = rptr[i+1] - rptr[i];
3863f457be1SHong Zhang     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3873f457be1SHong Zhang     vals += ncols;
3883f457be1SHong Zhang     cols += ncols;
3893f457be1SHong Zhang   }
3903f457be1SHong Zhang   /* insert received matrix entries */
3913f457be1SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++){
3923f457be1SHong Zhang     rstart = rowrange[recv_rank[imdex]];
3933f457be1SHong Zhang     rend   = rowrange[recv_rank[imdex]+1];
3943f457be1SHong Zhang     rptr = rbuf_j[imdex];
3953f457be1SHong Zhang     cols = rbuf_j[imdex] + rend-rstart + 1;
3963f457be1SHong Zhang     vals = rbuf_a[imdex];
3973f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
3983f457be1SHong Zhang       row   = i + rstart;
3993f457be1SHong Zhang       ncols = rptr[i+1] - rptr[i];
4003f457be1SHong Zhang       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
4013f457be1SHong Zhang       vals += ncols;
4023f457be1SHong Zhang       cols += ncols;
4033f457be1SHong Zhang     }
4043f457be1SHong Zhang   }
4053f457be1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4063f457be1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4073f457be1SHong Zhang   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
4083f457be1SHong Zhang   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
4090ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
410776b82aeSLisandro Dalcin     PetscContainer container;
4113f457be1SHong Zhang     *matredundant = C;
412b3804887SHong Zhang     /* create a supporting struct and attach it to C for reuse */
413b3804887SHong Zhang     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
414776b82aeSLisandro Dalcin     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
415776b82aeSLisandro Dalcin     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
416b3804887SHong Zhang     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
417776b82aeSLisandro Dalcin     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
4183f457be1SHong Zhang 
419b3804887SHong Zhang     redund->nzlocal = nzlocal;
420b3804887SHong Zhang     redund->nsends  = nsends;
421b3804887SHong Zhang     redund->nrecvs  = nrecvs;
422b3804887SHong Zhang     redund->send_rank = send_rank;
423b3804887SHong Zhang     redund->sbuf_nz = sbuf_nz;
424b3804887SHong Zhang     redund->sbuf_j  = sbuf_j;
425b3804887SHong Zhang     redund->sbuf_a  = sbuf_a;
426b3804887SHong Zhang     redund->rbuf_j  = rbuf_j;
427b3804887SHong Zhang     redund->rbuf_a  = rbuf_a;
428b3804887SHong Zhang 
429b3804887SHong Zhang     redund->MatDestroy = C->ops->destroy;
430b3804887SHong Zhang     C->ops->destroy    = MatDestroy_MatRedundant;
4313f457be1SHong Zhang   }
4323f457be1SHong Zhang   PetscFunctionReturn(0);
4333f457be1SHong Zhang }
4343f457be1SHong Zhang 
4354b9ad928SBarry Smith #undef __FUNCT__
4364b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant"
4376849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc)
4384b9ad928SBarry Smith {
4394b9ad928SBarry Smith   PC_Redundant   *red  = (PC_Redundant*)pc->data;
440dfbe8321SBarry Smith   PetscErrorCode ierr;
4413f457be1SHong Zhang   PetscInt       mstart,mend,mlocal,m;
44213f74950SBarry Smith   PetscMPIInt    size;
4434b9ad928SBarry Smith   MatReuse       reuse = MAT_INITIAL_MATRIX;
4444b9ad928SBarry Smith   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
4454b9ad928SBarry Smith   MPI_Comm       comm;
44623ce1328SBarry Smith   Vec            vec;
4474b9ad928SBarry Smith 
4483f457be1SHong Zhang   PetscInt       mlocal_sub;
4493f457be1SHong Zhang   PetscMPIInt    subsize,subrank;
4503f457be1SHong Zhang   PetscInt       rstart_sub,rend_sub,mloc_sub;
4513f457be1SHong Zhang 
4524b9ad928SBarry Smith   PetscFunctionBegin;
4533f457be1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
45423ce1328SBarry Smith   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
4554b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
45623ce1328SBarry Smith   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
4574b9ad928SBarry Smith   if (!pc->setupcalled) {
4583f457be1SHong Zhang     /* create working vectors xsub/ysub and xdup/ydup */
45923ce1328SBarry Smith     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
4603f457be1SHong Zhang     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
4614b9ad928SBarry Smith 
4623f457be1SHong Zhang     /* get local size of xsub/ysub */
4633f457be1SHong Zhang     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
4643f457be1SHong Zhang     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
4653f457be1SHong Zhang     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
4663f457be1SHong Zhang     if (subrank+1 < subsize){
4673f457be1SHong Zhang       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
4683f457be1SHong Zhang     } else {
4693f457be1SHong Zhang       rend_sub = m;
4703f457be1SHong Zhang     }
4713f457be1SHong Zhang     mloc_sub = rend_sub - rstart_sub;
4723f457be1SHong Zhang     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
4733f457be1SHong Zhang     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
4743f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
4753f457be1SHong Zhang 
4763f457be1SHong Zhang     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
4773f457be1SHong Zhang        Note: we use communicator dupcomm, not pc->comm! */
4783f457be1SHong Zhang     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
4793f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
4803f457be1SHong Zhang 
4813f457be1SHong Zhang     /* create vec scatters */
4823f457be1SHong Zhang     if (!red->scatterin){
4833f457be1SHong Zhang       IS       is1,is2;
4843f457be1SHong Zhang       PetscInt *idx1,*idx2,i,j,k;
4853f457be1SHong Zhang       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
4863f457be1SHong Zhang       idx2 = idx1 + red->nsubcomm*mlocal;
4873f457be1SHong Zhang       j = 0;
4883f457be1SHong Zhang       for (k=0; k<red->nsubcomm; k++){
4893f457be1SHong Zhang         for (i=mstart; i<mend; i++){
4903f457be1SHong Zhang           idx1[j]   = i;
4913f457be1SHong Zhang           idx2[j++] = i + m*k;
4923f457be1SHong Zhang         }
4933f457be1SHong Zhang       }
4943f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
4953f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
4963f457be1SHong Zhang       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
4973f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
4983f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
4993f457be1SHong Zhang 
5003f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
5013f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
5023f457be1SHong Zhang       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
5033f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
5043f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
5053f457be1SHong Zhang       ierr = PetscFree(idx1);CHKERRQ(ierr);
5064b9ad928SBarry Smith     }
5074b9ad928SBarry Smith   }
50823ce1328SBarry Smith   ierr = VecDestroy(vec);CHKERRQ(ierr);
5094b9ad928SBarry Smith 
5104b9ad928SBarry Smith   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
5113f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5124b9ad928SBarry Smith   if (size == 1) {
5134b9ad928SBarry Smith     red->useparallelmat = PETSC_FALSE;
5144b9ad928SBarry Smith   }
5154b9ad928SBarry Smith 
5164b9ad928SBarry Smith   if (red->useparallelmat) {
5174b9ad928SBarry Smith     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
5184b9ad928SBarry Smith       /* destroy old matrices */
5194b9ad928SBarry Smith       if (red->pmats) {
520b3804887SHong Zhang         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
5214b9ad928SBarry Smith       }
5224b9ad928SBarry Smith     } else if (pc->setupcalled == 1) {
5234b9ad928SBarry Smith       reuse = MAT_REUSE_MATRIX;
5244b9ad928SBarry Smith       str   = SAME_NONZERO_PATTERN;
5254b9ad928SBarry Smith     }
5264b9ad928SBarry Smith 
5273f457be1SHong Zhang     /* grab the parallel matrix and put it into processors of a subcomminicator */
528f664ae05SHong Zhang     /*--------------------------------------------------------------------------*/
529f664ae05SHong Zhang     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
530b3804887SHong Zhang     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
5313f457be1SHong Zhang     /* tell PC of the subcommunicator its operators */
532b3804887SHong Zhang     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
5334b9ad928SBarry Smith   } else {
5344b9ad928SBarry Smith     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
5354b9ad928SBarry Smith   }
5364b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
5374b9ad928SBarry Smith   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
5384b9ad928SBarry Smith   PetscFunctionReturn(0);
5394b9ad928SBarry Smith }
5404b9ad928SBarry Smith 
5414b9ad928SBarry Smith #undef __FUNCT__
5424b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant"
5436849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
5444b9ad928SBarry Smith {
5454b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
546dfbe8321SBarry Smith   PetscErrorCode ierr;
5473f457be1SHong Zhang   PetscScalar    *array;
5484b9ad928SBarry Smith 
5494b9ad928SBarry Smith   PetscFunctionBegin;
5503f457be1SHong Zhang   /* scatter x to xdup */
5513f457be1SHong Zhang   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
5523f457be1SHong Zhang   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
5533f457be1SHong Zhang 
5543f457be1SHong Zhang   /* place xdup's local array into xsub */
5553f457be1SHong Zhang   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
5563f457be1SHong Zhang   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
5574b9ad928SBarry Smith 
5584b9ad928SBarry Smith   /* apply preconditioner on each processor */
5593f457be1SHong Zhang   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
5603f457be1SHong Zhang   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
5613f457be1SHong Zhang   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
5624b9ad928SBarry Smith 
5633f457be1SHong Zhang   /* place ysub's local array into ydup */
5643f457be1SHong Zhang   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
5653f457be1SHong Zhang   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
5663f457be1SHong Zhang 
5673f457be1SHong Zhang   /* scatter ydup to y */
5683f457be1SHong Zhang   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
5693f457be1SHong Zhang   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
5703f457be1SHong Zhang   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
5713f457be1SHong Zhang   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
5724b9ad928SBarry Smith   PetscFunctionReturn(0);
5734b9ad928SBarry Smith }
5744b9ad928SBarry Smith 
5754b9ad928SBarry Smith #undef __FUNCT__
5764b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant"
5776849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc)
5784b9ad928SBarry Smith {
5794b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
580dfbe8321SBarry Smith   PetscErrorCode ierr;
5814b9ad928SBarry Smith 
5824b9ad928SBarry Smith   PetscFunctionBegin;
5834b9ad928SBarry Smith   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
5844b9ad928SBarry Smith   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
5853f457be1SHong Zhang   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
5863f457be1SHong Zhang   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
5873f457be1SHong Zhang   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
5883f457be1SHong Zhang   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
589b3804887SHong Zhang   if (red->pmats) {
590b3804887SHong Zhang     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
5913f457be1SHong Zhang   }
5923f457be1SHong Zhang 
593b3804887SHong Zhang 
5944b9ad928SBarry Smith   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
5954b9ad928SBarry Smith   ierr = PetscFree(red);CHKERRQ(ierr);
5964b9ad928SBarry Smith   PetscFunctionReturn(0);
5974b9ad928SBarry Smith }
5984b9ad928SBarry Smith 
5994b9ad928SBarry Smith #undef __FUNCT__
6004b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant"
6016849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
6024b9ad928SBarry Smith {
603*a98ce0f4SHong Zhang   PetscErrorCode ierr;
604*a98ce0f4SHong Zhang   PC_Redundant   *red = (PC_Redundant*)pc->data;
605*a98ce0f4SHong Zhang 
6064b9ad928SBarry Smith   PetscFunctionBegin;
607*a98ce0f4SHong Zhang   ierr = PetscOptionsHead("Redundant options");CHKERRQ(ierr);
608*a98ce0f4SHong Zhang   ierr = PetscOptionsInt("-pc_redundant_number_comm","Number of subcommunicators","PCRedundantSetNumComm",red->nsubcomm,&red->nsubcomm,0);CHKERRQ(ierr);
609*a98ce0f4SHong Zhang   ierr = PetscOptionsTail();CHKERRQ(ierr);
6104b9ad928SBarry Smith   PetscFunctionReturn(0);
6114b9ad928SBarry Smith }
6124b9ad928SBarry Smith 
6134b9ad928SBarry Smith EXTERN_C_BEGIN
6144b9ad928SBarry Smith #undef __FUNCT__
6154b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant"
616dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
6174b9ad928SBarry Smith {
6184b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
619dfbe8321SBarry Smith   PetscErrorCode ierr;
6204b9ad928SBarry Smith 
6214b9ad928SBarry Smith   PetscFunctionBegin;
6224b9ad928SBarry Smith   red->scatterin  = in;
6234b9ad928SBarry Smith   red->scatterout = out;
6244b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
6254b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
6264b9ad928SBarry Smith   PetscFunctionReturn(0);
6274b9ad928SBarry Smith }
6284b9ad928SBarry Smith EXTERN_C_END
6294b9ad928SBarry Smith 
6304b9ad928SBarry Smith #undef __FUNCT__
6314b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter"
6324b9ad928SBarry Smith /*@
6334b9ad928SBarry Smith    PCRedundantSetScatter - Sets the scatter used to copy values into the
6344b9ad928SBarry Smith      redundant local solve and the scatter to move them back into the global
6354b9ad928SBarry Smith      vector.
6364b9ad928SBarry Smith 
6374b9ad928SBarry Smith    Collective on PC
6384b9ad928SBarry Smith 
6394b9ad928SBarry Smith    Input Parameters:
6404b9ad928SBarry Smith +  pc - the preconditioner context
6414b9ad928SBarry Smith .  in - the scatter to move the values in
6424b9ad928SBarry Smith -  out - the scatter to move them out
6434b9ad928SBarry Smith 
6444b9ad928SBarry Smith    Level: advanced
6454b9ad928SBarry Smith 
6464b9ad928SBarry Smith .keywords: PC, redundant solve
6474b9ad928SBarry Smith @*/
648dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
6494b9ad928SBarry Smith {
650dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
6514b9ad928SBarry Smith 
6524b9ad928SBarry Smith   PetscFunctionBegin;
6534482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6544482741eSBarry Smith   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
6554482741eSBarry Smith   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
6564b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
6574b9ad928SBarry Smith   if (f) {
6584b9ad928SBarry Smith     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
6594b9ad928SBarry Smith   }
6604b9ad928SBarry Smith   PetscFunctionReturn(0);
6614b9ad928SBarry Smith }
6624b9ad928SBarry Smith 
6634b9ad928SBarry Smith EXTERN_C_BEGIN
6644b9ad928SBarry Smith #undef __FUNCT__
6654b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant"
666dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
6674b9ad928SBarry Smith {
6684b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
6694b9ad928SBarry Smith 
6704b9ad928SBarry Smith   PetscFunctionBegin;
6714b9ad928SBarry Smith   *innerpc = red->pc;
6724b9ad928SBarry Smith   PetscFunctionReturn(0);
6734b9ad928SBarry Smith }
6744b9ad928SBarry Smith EXTERN_C_END
6754b9ad928SBarry Smith 
6764b9ad928SBarry Smith #undef __FUNCT__
6774b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC"
6784b9ad928SBarry Smith /*@
6794b9ad928SBarry Smith    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
6804b9ad928SBarry Smith 
6814b9ad928SBarry Smith    Not Collective
6824b9ad928SBarry Smith 
6834b9ad928SBarry Smith    Input Parameter:
6844b9ad928SBarry Smith .  pc - the preconditioner context
6854b9ad928SBarry Smith 
6864b9ad928SBarry Smith    Output Parameter:
6874b9ad928SBarry Smith .  innerpc - the sequential PC
6884b9ad928SBarry Smith 
6894b9ad928SBarry Smith    Level: advanced
6904b9ad928SBarry Smith 
6914b9ad928SBarry Smith .keywords: PC, redundant solve
6924b9ad928SBarry Smith @*/
693dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
6944b9ad928SBarry Smith {
695dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,PC*);
6964b9ad928SBarry Smith 
6974b9ad928SBarry Smith   PetscFunctionBegin;
6984482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
6994482741eSBarry Smith   PetscValidPointer(innerpc,2);
7004b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
7014b9ad928SBarry Smith   if (f) {
7024b9ad928SBarry Smith     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
7034b9ad928SBarry Smith   }
7044b9ad928SBarry Smith   PetscFunctionReturn(0);
7054b9ad928SBarry Smith }
7064b9ad928SBarry Smith 
7074b9ad928SBarry Smith EXTERN_C_BEGIN
7084b9ad928SBarry Smith #undef __FUNCT__
7094b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant"
710dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
7114b9ad928SBarry Smith {
7124b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
7134b9ad928SBarry Smith 
7144b9ad928SBarry Smith   PetscFunctionBegin;
715b3804887SHong Zhang   if (mat)  *mat  = red->pmats;
716b3804887SHong Zhang   if (pmat) *pmat = red->pmats;
7174b9ad928SBarry Smith   PetscFunctionReturn(0);
7184b9ad928SBarry Smith }
7194b9ad928SBarry Smith EXTERN_C_END
7204b9ad928SBarry Smith 
7214b9ad928SBarry Smith #undef __FUNCT__
7224b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators"
7234b9ad928SBarry Smith /*@
7244b9ad928SBarry Smith    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
7254b9ad928SBarry Smith 
7264b9ad928SBarry Smith    Not Collective
7274b9ad928SBarry Smith 
7284b9ad928SBarry Smith    Input Parameter:
7294b9ad928SBarry Smith .  pc - the preconditioner context
7304b9ad928SBarry Smith 
7314b9ad928SBarry Smith    Output Parameters:
7324b9ad928SBarry Smith +  mat - the matrix
7334b9ad928SBarry Smith -  pmat - the (possibly different) preconditioner matrix
7344b9ad928SBarry Smith 
7354b9ad928SBarry Smith    Level: advanced
7364b9ad928SBarry Smith 
7374b9ad928SBarry Smith .keywords: PC, redundant solve
7384b9ad928SBarry Smith @*/
739dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
7404b9ad928SBarry Smith {
741dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
7424b9ad928SBarry Smith 
7434b9ad928SBarry Smith   PetscFunctionBegin;
7444482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
7454482741eSBarry Smith   if (mat)  PetscValidPointer(mat,2);
7464482741eSBarry Smith   if (pmat) PetscValidPointer(pmat,3);
7474b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
7484b9ad928SBarry Smith   if (f) {
7494b9ad928SBarry Smith     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
7504b9ad928SBarry Smith   }
7514b9ad928SBarry Smith   PetscFunctionReturn(0);
7524b9ad928SBarry Smith }
7534b9ad928SBarry Smith 
7544b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/
75537a17b4dSBarry Smith /*MC
756*a98ce0f4SHong Zhang      PCREDUNDANT - Runs a preconditioner for the entire problem on subgroups of processors
75737a17b4dSBarry Smith 
75837a17b4dSBarry Smith 
75937a17b4dSBarry Smith      Options for the redundant preconditioners can be set with -redundant_pc_xxx
76037a17b4dSBarry Smith 
76109391456SBarry Smith   Options Database:
76209391456SBarry Smith .  -pc_redundant_number_comm - number of sub communicators to use
76309391456SBarry Smith 
76437a17b4dSBarry Smith    Level: intermediate
76537a17b4dSBarry Smith 
76637a17b4dSBarry Smith 
76737a17b4dSBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
76837a17b4dSBarry Smith            PCRedundantGetPC(), PCRedundantGetOperators()
76937a17b4dSBarry Smith 
77037a17b4dSBarry Smith M*/
77137a17b4dSBarry Smith 
7724b9ad928SBarry Smith EXTERN_C_BEGIN
7734b9ad928SBarry Smith #undef __FUNCT__
7744b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant"
775dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
7764b9ad928SBarry Smith {
777dfbe8321SBarry Smith   PetscErrorCode ierr;
7784b9ad928SBarry Smith   PC_Redundant   *red;
779e060cb09SBarry Smith   const char     *prefix;
7803f457be1SHong Zhang   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
781f6341431SHong Zhang   PetscMPIInt    rank,size,subrank,*subsize,duprank;
782f6341431SHong Zhang   MPI_Comm       subcomm=0,dupcomm=0;
7833f457be1SHong Zhang 
7844b9ad928SBarry Smith   PetscFunctionBegin;
7854b9ad928SBarry Smith   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
78652e6d16bSBarry Smith   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
7874b9ad928SBarry Smith   red->useparallelmat   = PETSC_TRUE;
7884b9ad928SBarry Smith 
7893f457be1SHong Zhang   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
7903f457be1SHong Zhang   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
7913f457be1SHong Zhang   nsubcomm = size;
79209391456SBarry Smith   ierr = PetscOptionsGetInt(PETSC_NULL,"-pc_redundant_number_comm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
7933f457be1SHong Zhang   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
7943f457be1SHong Zhang 
795f6341431SHong Zhang   /*--------------------------------------------------------------------------------------------------
796f6341431SHong Zhang     To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a
797f6341431SHong Zhang     processe into a subcomm.
798f6341431SHong Zhang     An example: size=4, nsubcomm=3
799f6341431SHong Zhang      pc->comm:
800f6341431SHong Zhang       rank:     [0]  [1]  [2]  [3]
801f6341431SHong Zhang       color:     0    1    2    0
802f6341431SHong Zhang 
803f6341431SHong Zhang      subcomm:
804f6341431SHong Zhang       subrank:  [0]  [0]  [0]  [1]
805f6341431SHong Zhang 
806f6341431SHong Zhang      dupcomm:
807f6341431SHong Zhang       duprank:  [0]  [2]  [3]  [1]
808f6341431SHong Zhang 
809f6341431SHong Zhang      Here, subcomm[color = 0] has subsize=2, owns process [0] and [3]
810f6341431SHong Zhang            subcomm[color = 1] has subsize=1, owns process [1]
811f6341431SHong Zhang            subcomm[color = 2] has subsize=1, owns process [2]
812f6341431SHong Zhang           dupcomm has same number of processes as pc->comm, and its duprank maps
813f6341431SHong Zhang           processes in subcomm contiguously into a 1d array:
814f6341431SHong Zhang             duprank: [0] [1]      [2]         [3]
815f6341431SHong Zhang             rank:    [0] [3]      [1]         [2]
816f6341431SHong Zhang                     subcomm[0] subcomm[1]  subcomm[2]
817f6341431SHong Zhang    ----------------------------------------------------------------------------------------*/
818f6341431SHong Zhang 
8193f457be1SHong Zhang   /* get size of each subcommunicators */
8203f457be1SHong Zhang   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
8213f457be1SHong Zhang   np_subcomm = size/nsubcomm;
8223f457be1SHong Zhang   nleftover  = size - nsubcomm*np_subcomm;
8233f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
8243f457be1SHong Zhang     subsize[i] = np_subcomm;
8253f457be1SHong Zhang     if (i<nleftover) subsize[i]++;
8263f457be1SHong Zhang   }
8273f457be1SHong Zhang 
8283f457be1SHong Zhang   /* find color for this proc */
8293f457be1SHong Zhang   color   = rank%nsubcomm;
8303f457be1SHong Zhang   subrank = rank/nsubcomm;
8313f457be1SHong Zhang 
8323f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
8333f457be1SHong Zhang   red->subcomm  = subcomm;
8343f457be1SHong Zhang   red->color    = color;
8353f457be1SHong Zhang   red->nsubcomm = nsubcomm;
8363f457be1SHong Zhang 
8373f457be1SHong Zhang   j = 0; duprank = 0;
8383f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
8393f457be1SHong Zhang     if (j == color){
8403f457be1SHong Zhang       duprank += subrank;
8413f457be1SHong Zhang       break;
8423f457be1SHong Zhang     }
8433f457be1SHong Zhang     duprank += subsize[i]; j++;
8443f457be1SHong Zhang   }
8453f457be1SHong Zhang 
846f6341431SHong Zhang   /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
8473f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
8483f457be1SHong Zhang   red->dupcomm = dupcomm;
8493f457be1SHong Zhang   ierr = PetscFree(subsize);CHKERRQ(ierr);
8503f457be1SHong Zhang 
8514b9ad928SBarry Smith   /* create the sequential PC that each processor has copy of */
8523f457be1SHong Zhang   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
8534b9ad928SBarry Smith   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
8544b9ad928SBarry Smith   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
8554b9ad928SBarry Smith   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
8564b9ad928SBarry Smith   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
8574b9ad928SBarry Smith 
8584b9ad928SBarry Smith   pc->ops->apply             = PCApply_Redundant;
8594b9ad928SBarry Smith   pc->ops->applytranspose    = 0;
8604b9ad928SBarry Smith   pc->ops->setup             = PCSetUp_Redundant;
8614b9ad928SBarry Smith   pc->ops->destroy           = PCDestroy_Redundant;
8624b9ad928SBarry Smith   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
8634b9ad928SBarry Smith   pc->ops->setuponblocks     = 0;
8644b9ad928SBarry Smith   pc->ops->view              = PCView_Redundant;
8654b9ad928SBarry Smith   pc->ops->applyrichardson   = 0;
8664b9ad928SBarry Smith 
8674b9ad928SBarry Smith   pc->data     = (void*)red;
8684b9ad928SBarry Smith 
8694b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
8704b9ad928SBarry Smith                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
8714b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
8724b9ad928SBarry Smith                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
8734b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
8744b9ad928SBarry Smith                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
8754b9ad928SBarry Smith 
8764b9ad928SBarry Smith   PetscFunctionReturn(0);
8774b9ad928SBarry Smith }
8784b9ad928SBarry Smith EXTERN_C_END
879