xref: /petsc/src/ksp/pc/impls/redundant/redundant.c (revision 3365e7758de60e0e120d35fbab85b245aa560968)
1dba47a55SKris Buschelman #define PETSCKSP_DLL
2dba47a55SKris Buschelman 
34b9ad928SBarry Smith /*
43f457be1SHong Zhang   This file defines a "solve the problem redundantly on each subgroup of processor" preconditioner.
54b9ad928SBarry Smith */
66356e834SBarry Smith #include "private/pcimpl.h"     /*I "petscpc.h" I*/
74b9ad928SBarry Smith #include "petscksp.h"
84b9ad928SBarry Smith 
94b9ad928SBarry Smith typedef struct {
104b9ad928SBarry Smith   PC         pc;                   /* actual preconditioner used on each processor */
113f457be1SHong Zhang   Vec        xsub,ysub;            /* vectors of a subcommunicator to hold parallel vectors of pc->comm */
123f457be1SHong Zhang   Vec        xdup,ydup;            /* parallel vector that congregates xsub or ysub facilitating vector scattering */
13b3804887SHong Zhang   Mat        pmats;                /* matrix and optional preconditioner matrix belong to a subcommunicator */
143f457be1SHong Zhang   VecScatter scatterin,scatterout; /* scatter used to move all values to each processor group (subcommunicator) */
154b9ad928SBarry Smith   PetscTruth useparallelmat;
163f457be1SHong Zhang   MPI_Comm   subcomm;              /* processors belong to a subcommunicator implement a PC in parallel */
173f457be1SHong Zhang   MPI_Comm   dupcomm;              /* processors belong to pc->comm with their rank remapped in the way
183f457be1SHong Zhang                                       that vector xdup/ydup has contiguous rank while appending xsub/ysub along their colors */
193f457be1SHong Zhang   PetscInt   nsubcomm;             /* num of subcommunicators, which equals the num of redundant matrix systems */
203f457be1SHong Zhang   PetscInt   color;                /* color of processors in a subcommunicator */
214b9ad928SBarry Smith } PC_Redundant;
224b9ad928SBarry Smith 
23a47c9f9aSHong Zhang #include "src/sys/viewer/impls/ascii/asciiimpl.h"  /*I     "petsc.h"   I*/
24a47c9f9aSHong Zhang #include "petscfix.h"
25a47c9f9aSHong Zhang #include <stdarg.h>
26a47c9f9aSHong Zhang 
27a47c9f9aSHong Zhang PetscErrorCode PETSC_DLLEXPORT PetscViewerRestoreSubcomm(PetscViewer viewer,PetscViewer *outviewer)
28a47c9f9aSHong Zhang {
29a47c9f9aSHong Zhang   PetscErrorCode ierr;
30a47c9f9aSHong Zhang   PetscMPIInt    size;
31a47c9f9aSHong Zhang 
32a47c9f9aSHong Zhang   PetscFunctionBegin;
33a47c9f9aSHong Zhang   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_COOKIE,1);
34a47c9f9aSHong Zhang 
35a47c9f9aSHong Zhang   ierr = MPI_Comm_size(viewer->comm,&size);CHKERRQ(ierr);
36a47c9f9aSHong Zhang   if (size == 1) {
37a47c9f9aSHong Zhang     ierr = PetscObjectDereference((PetscObject)viewer);CHKERRQ(ierr);
38a47c9f9aSHong Zhang     if (outviewer) *outviewer = 0;
39a47c9f9aSHong Zhang   } else if (viewer->ops->restoresingleton) {
40a47c9f9aSHong Zhang     ierr = (*viewer->ops->restoresingleton)(viewer,outviewer);CHKERRQ(ierr);
41a47c9f9aSHong Zhang   }
42a47c9f9aSHong Zhang   PetscFunctionReturn(0);
43a47c9f9aSHong Zhang }
44a47c9f9aSHong Zhang 
45a47c9f9aSHong Zhang #undef __FUNCT__
46a47c9f9aSHong Zhang #define __FUNCT__ "PetscViewerDestroy_ASCIIh"
47a47c9f9aSHong Zhang PetscErrorCode PetscViewerDestroy_ASCIIh(PetscViewer viewer)
48a47c9f9aSHong Zhang {
49a47c9f9aSHong Zhang   PetscMPIInt       rank;
50a47c9f9aSHong Zhang   PetscErrorCode    ierr;
51a47c9f9aSHong Zhang   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
52a47c9f9aSHong Zhang   PetscViewerLink   *vlink;
53a47c9f9aSHong Zhang   PetscTruth        flg;
54a47c9f9aSHong Zhang 
55a47c9f9aSHong Zhang   PetscFunctionBegin;
56a47c9f9aSHong Zhang   if (vascii->sviewer) {
57a47c9f9aSHong Zhang     SETERRQ(PETSC_ERR_ORDER,"ASCII PetscViewer destroyed before restoring singleton PetscViewer");
58a47c9f9aSHong Zhang   }
59a47c9f9aSHong Zhang   ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr);
60a47c9f9aSHong Zhang   if (!rank && vascii->fd != stderr && vascii->fd != stdout) {
61a47c9f9aSHong Zhang     if (vascii->fd) fclose(vascii->fd);
62a47c9f9aSHong Zhang     if (vascii->storecompressed) {
63a47c9f9aSHong Zhang       char par[PETSC_MAX_PATH_LEN],buf[PETSC_MAX_PATH_LEN];
64a47c9f9aSHong Zhang       FILE *fp;
65a47c9f9aSHong Zhang       ierr = PetscStrcpy(par,"gzip ");CHKERRQ(ierr);
66a47c9f9aSHong Zhang       ierr = PetscStrcat(par,vascii->filename);CHKERRQ(ierr);
67a47c9f9aSHong Zhang #if defined(PETSC_HAVE_POPEN)
68a47c9f9aSHong Zhang       ierr = PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,par,"r",&fp);CHKERRQ(ierr);
69a47c9f9aSHong Zhang       if (fgets(buf,1024,fp)) {
70a47c9f9aSHong Zhang         SETERRQ2(PETSC_ERR_LIB,"Error from compression command %s\n%s",par,buf);
71a47c9f9aSHong Zhang       }
72a47c9f9aSHong Zhang       ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
73a47c9f9aSHong Zhang #else
74a47c9f9aSHong Zhang       SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine");
75a47c9f9aSHong Zhang #endif
76a47c9f9aSHong Zhang     }
77a47c9f9aSHong Zhang   }
78a47c9f9aSHong Zhang   ierr = PetscStrfree(vascii->filename);CHKERRQ(ierr);
79a47c9f9aSHong Zhang   ierr = PetscFree(vascii);CHKERRQ(ierr);
80a47c9f9aSHong Zhang 
81a47c9f9aSHong Zhang   /* remove the viewer from the list in the MPI Communicator */
82a47c9f9aSHong Zhang   if (Petsc_Viewer_keyval == MPI_KEYVAL_INVALID) {
83a47c9f9aSHong Zhang     ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelViewer,&Petsc_Viewer_keyval,(void*)0);CHKERRQ(ierr);
84a47c9f9aSHong Zhang   }
85a47c9f9aSHong Zhang 
86a47c9f9aSHong Zhang   ierr = MPI_Attr_get(viewer->comm,Petsc_Viewer_keyval,(void**)&vlink,(PetscMPIInt*)&flg);CHKERRQ(ierr);
87a47c9f9aSHong Zhang   if (flg) {
88a47c9f9aSHong Zhang     if (vlink && vlink->viewer == viewer) {
89a47c9f9aSHong Zhang       ierr = MPI_Attr_put(viewer->comm,Petsc_Viewer_keyval,vlink->next);CHKERRQ(ierr);
90a47c9f9aSHong Zhang       ierr = PetscFree(vlink);CHKERRQ(ierr);
91a47c9f9aSHong Zhang     } else {
92a47c9f9aSHong Zhang       while (vlink && vlink->next) {
93a47c9f9aSHong Zhang         if (vlink->next->viewer == viewer) {
94a47c9f9aSHong Zhang           PetscViewerLink *nv = vlink->next;
95a47c9f9aSHong Zhang           vlink->next = vlink->next->next;
96a47c9f9aSHong Zhang           ierr = PetscFree(nv);CHKERRQ(ierr);
97a47c9f9aSHong Zhang         }
98a47c9f9aSHong Zhang         vlink = vlink->next;
99a47c9f9aSHong Zhang       }
100a47c9f9aSHong Zhang     }
101a47c9f9aSHong Zhang   }
102a47c9f9aSHong Zhang   PetscFunctionReturn(0);
103a47c9f9aSHong Zhang }
104a47c9f9aSHong Zhang 
105a47c9f9aSHong Zhang #undef __FUNCT__
106a47c9f9aSHong Zhang #define __FUNCT__ "PetscViewerDestroy_ASCII_Subcomm"
107a47c9f9aSHong Zhang PetscErrorCode PetscViewerDestroy_ASCII_Subcomm(PetscViewer viewer)
108a47c9f9aSHong Zhang {
109a47c9f9aSHong Zhang   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
110a47c9f9aSHong Zhang   PetscErrorCode    ierr;
111a47c9f9aSHong Zhang   PetscFunctionBegin;
112a47c9f9aSHong Zhang   ierr = PetscViewerRestoreSubcomm(vascii->bviewer,&viewer);CHKERRQ(ierr);
113a47c9f9aSHong Zhang   PetscFunctionReturn(0);
114a47c9f9aSHong Zhang }
115a47c9f9aSHong Zhang 
116a47c9f9aSHong Zhang #undef __FUNCT__
117a47c9f9aSHong Zhang #define __FUNCT__ "PetscViewerFlush_ASCII_Subcomm_0"
118a47c9f9aSHong Zhang PetscErrorCode PetscViewerFlush_ASCII_Subcomm_0(PetscViewer viewer)
119a47c9f9aSHong Zhang {
120a47c9f9aSHong Zhang   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data;
121a47c9f9aSHong Zhang 
122a47c9f9aSHong Zhang   PetscFunctionBegin;
123a47c9f9aSHong Zhang   fflush(vascii->fd);
124a47c9f9aSHong Zhang   PetscFunctionReturn(0);
125a47c9f9aSHong Zhang }
126a47c9f9aSHong Zhang 
127a47c9f9aSHong Zhang #undef __FUNCT__
128a47c9f9aSHong Zhang #define __FUNCT__ "PetscViewerGetSubcomm_ASCII"
129a47c9f9aSHong Zhang PetscErrorCode PetscViewerGetSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
130a47c9f9aSHong Zhang {
131a47c9f9aSHong Zhang   PetscMPIInt       rank;
132a47c9f9aSHong Zhang   PetscErrorCode    ierr;
133a47c9f9aSHong Zhang   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)viewer->data,*ovascii;
134a47c9f9aSHong Zhang   const char        *name;
135a47c9f9aSHong Zhang 
136a47c9f9aSHong Zhang   PetscFunctionBegin;
137a47c9f9aSHong Zhang   if (vascii->sviewer) {
138a47c9f9aSHong Zhang     SETERRQ(PETSC_ERR_ORDER,"Subcomm already obtained from PetscViewer and not restored");
139a47c9f9aSHong Zhang   }
140a47c9f9aSHong Zhang   /* ierr         = PetscViewerCreate(PETSC_COMM_SELF,outviewer);CHKERRQ(ierr); */
141a47c9f9aSHong Zhang   ierr         = PetscViewerCreate(subcomm,outviewer);CHKERRQ(ierr);
142a47c9f9aSHong Zhang   ierr         = PetscViewerSetType(*outviewer,PETSC_VIEWER_ASCII);CHKERRQ(ierr);
143a47c9f9aSHong Zhang   ovascii      = (PetscViewer_ASCII*)(*outviewer)->data;
144a47c9f9aSHong Zhang   ovascii->fd  = vascii->fd;
145a47c9f9aSHong Zhang   ovascii->tab = vascii->tab;
146a47c9f9aSHong Zhang 
147a47c9f9aSHong Zhang   vascii->sviewer = *outviewer;
148a47c9f9aSHong Zhang 
149a47c9f9aSHong Zhang   (*outviewer)->format     = viewer->format;
150a47c9f9aSHong Zhang   (*outviewer)->iformat    = viewer->iformat;
151a47c9f9aSHong Zhang 
152a47c9f9aSHong Zhang   ierr = PetscObjectGetName((PetscObject)viewer,&name);CHKERRQ(ierr);
153a47c9f9aSHong Zhang   ierr = PetscObjectSetName((PetscObject)(*outviewer),name);CHKERRQ(ierr);
154a47c9f9aSHong Zhang 
155a47c9f9aSHong Zhang   ierr = MPI_Comm_rank(viewer->comm,&rank);CHKERRQ(ierr);
156a47c9f9aSHong Zhang   ((PetscViewer_ASCII*)((*outviewer)->data))->bviewer = viewer;
157a47c9f9aSHong Zhang   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCII_Subcomm;
158a47c9f9aSHong Zhang   if (rank) {
159a47c9f9aSHong Zhang     (*outviewer)->ops->flush = 0;
160a47c9f9aSHong Zhang   } else {
161a47c9f9aSHong Zhang     (*outviewer)->ops->flush = PetscViewerFlush_ASCII_Subcomm_0;
162a47c9f9aSHong Zhang   }
163a47c9f9aSHong Zhang   PetscFunctionReturn(0);
164a47c9f9aSHong Zhang }
165a47c9f9aSHong Zhang 
166a47c9f9aSHong Zhang #undef __FUNCT__
167a47c9f9aSHong Zhang #define __FUNCT__ "PetscViewerRestoreSubcomm_ASCII"
168a47c9f9aSHong Zhang PetscErrorCode PetscViewerRestoreSubcomm_ASCII(PetscViewer viewer,MPI_Comm subcomm,PetscViewer *outviewer)
169a47c9f9aSHong Zhang {
170a47c9f9aSHong Zhang   PetscErrorCode    ierr;
171a47c9f9aSHong Zhang   PetscViewer_ASCII *vascii = (PetscViewer_ASCII *)(*outviewer)->data;
172a47c9f9aSHong Zhang   PetscViewer_ASCII *ascii  = (PetscViewer_ASCII *)viewer->data;
173a47c9f9aSHong Zhang 
174a47c9f9aSHong Zhang   PetscFunctionBegin;
175a47c9f9aSHong Zhang   if (!ascii->sviewer) {
176a47c9f9aSHong Zhang     SETERRQ(PETSC_ERR_ORDER,"Subcomm never obtained from PetscViewer");
177a47c9f9aSHong Zhang   }
178a47c9f9aSHong Zhang   if (ascii->sviewer != *outviewer) {
179a47c9f9aSHong Zhang     SETERRQ(PETSC_ERR_ARG_WRONG,"This PetscViewer did not generate singleton");
180a47c9f9aSHong Zhang   }
181a47c9f9aSHong Zhang 
182a47c9f9aSHong Zhang   ascii->sviewer             = 0;
183a47c9f9aSHong Zhang   vascii->fd                 = stdout;
184a47c9f9aSHong Zhang   (*outviewer)->ops->destroy = PetscViewerDestroy_ASCIIh;
185a47c9f9aSHong Zhang   ierr                       = PetscViewerDestroy(*outviewer);CHKERRQ(ierr);
186a47c9f9aSHong Zhang   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
187a47c9f9aSHong Zhang   PetscFunctionReturn(0);
188a47c9f9aSHong Zhang }
189a47c9f9aSHong Zhang 
1904b9ad928SBarry Smith #undef __FUNCT__
1914b9ad928SBarry Smith #define __FUNCT__ "PCView_Redundant"
1926849ba73SBarry Smith static PetscErrorCode PCView_Redundant(PC pc,PetscViewer viewer)
1934b9ad928SBarry Smith {
1944b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
195dfbe8321SBarry Smith   PetscErrorCode ierr;
19613f74950SBarry Smith   PetscMPIInt    rank;
19732077d6dSBarry Smith   PetscTruth     iascii,isstring;
198a47c9f9aSHong Zhang   PetscViewer    sviewer,subviewer;
199a47c9f9aSHong Zhang   PetscInt       color = red->color;
2004b9ad928SBarry Smith 
2014b9ad928SBarry Smith   PetscFunctionBegin;
2024b9ad928SBarry Smith   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
20332077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
2044b9ad928SBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_STRING,&isstring);CHKERRQ(ierr);
20532077d6dSBarry Smith   if (iascii) {
2064b9ad928SBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"  Redundant solver preconditioner: Actual PC follows\n");CHKERRQ(ierr);
207a47c9f9aSHong Zhang     ierr = PetscViewerGetSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
208a47c9f9aSHong Zhang     /* ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); */
209a47c9f9aSHong Zhang     if (!color) {
2104b9ad928SBarry Smith       ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
211a47c9f9aSHong Zhang       ierr = PCView(red->pc,subviewer);CHKERRQ(ierr);
2124b9ad928SBarry Smith       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
2134b9ad928SBarry Smith     }
214a47c9f9aSHong Zhang     /* ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); */
215a47c9f9aSHong Zhang     ierr = PetscViewerRestoreSubcomm_ASCII(viewer,red->pc->comm,&subviewer);CHKERRQ(ierr);
2164b9ad928SBarry Smith   } else if (isstring) {
2174b9ad928SBarry Smith     ierr = PetscViewerStringSPrintf(viewer," Redundant solver preconditioner");CHKERRQ(ierr);
2184b9ad928SBarry Smith     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
2194b9ad928SBarry Smith     if (!rank) {
2204b9ad928SBarry Smith       ierr = PCView(red->pc,sviewer);CHKERRQ(ierr);
2214b9ad928SBarry Smith     }
2224b9ad928SBarry Smith     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
2234b9ad928SBarry Smith   } else {
22479a5c55eSBarry Smith     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PC redundant",((PetscObject)viewer)->type_name);
2254b9ad928SBarry Smith   }
2264b9ad928SBarry Smith   PetscFunctionReturn(0);
2274b9ad928SBarry Smith }
2284b9ad928SBarry Smith 
2293f457be1SHong Zhang #include "src/mat/matimpl.h"        /*I "petscmat.h" I*/
2303f457be1SHong Zhang #include "private/vecimpl.h"
2313f457be1SHong Zhang #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
2323f457be1SHong Zhang #include "src/mat/impls/aij/seq/aij.h"      /*I "petscmat.h" I*/
2333f457be1SHong Zhang 
234b3804887SHong Zhang typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
235b3804887SHong Zhang   PetscInt       nzlocal,nsends,nrecvs;
236b3804887SHong Zhang   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
237b3804887SHong Zhang   PetscScalar    *sbuf_a,**rbuf_a;
238b3804887SHong Zhang   PetscErrorCode (*MatDestroy)(Mat);
239b3804887SHong Zhang } Mat_Redundant;
240b3804887SHong Zhang 
241b3804887SHong Zhang #undef __FUNCT__
242b3804887SHong Zhang #define __FUNCT__ "PetscObjectContainerDestroy_MatRedundant"
243b3804887SHong Zhang PetscErrorCode PetscObjectContainerDestroy_MatRedundant(void *ptr)
244b3804887SHong Zhang {
245b3804887SHong Zhang   PetscErrorCode       ierr;
246b3804887SHong Zhang   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
247b3804887SHong Zhang   PetscInt             i;
248b3804887SHong Zhang 
249b3804887SHong Zhang   PetscFunctionBegin;
250b3804887SHong Zhang   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
251b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
252b3804887SHong Zhang   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
253b3804887SHong Zhang   for (i=0; i<redund->nrecvs; i++){
254b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
255b3804887SHong Zhang     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
256b3804887SHong Zhang   }
257b3804887SHong Zhang   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
258b3804887SHong Zhang   ierr = PetscFree(redund);CHKERRQ(ierr);
259b3804887SHong Zhang   PetscFunctionReturn(0);
260b3804887SHong Zhang }
261b3804887SHong Zhang 
262b3804887SHong Zhang #undef __FUNCT__
263b3804887SHong Zhang #define __FUNCT__ "MatDestroy_MatRedundant"
264b3804887SHong Zhang PetscErrorCode MatDestroy_MatRedundant(Mat A)
265b3804887SHong Zhang {
266b3804887SHong Zhang   PetscErrorCode       ierr;
267b3804887SHong Zhang   PetscObjectContainer container;
268b3804887SHong Zhang   Mat_Redundant        *redund=PETSC_NULL;
269b3804887SHong Zhang 
270b3804887SHong Zhang   PetscFunctionBegin;
271b3804887SHong Zhang   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
272b3804887SHong Zhang   if (container) {
273b3804887SHong Zhang     ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
274b3804887SHong Zhang   } else {
275b3804887SHong Zhang     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
276b3804887SHong Zhang   }
277b3804887SHong Zhang   A->ops->destroy = redund->MatDestroy;
278b3804887SHong Zhang   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
279b3804887SHong Zhang   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
280b3804887SHong Zhang   ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
281b3804887SHong Zhang   PetscFunctionReturn(0);
282b3804887SHong Zhang }
283b3804887SHong Zhang 
2843f457be1SHong Zhang #undef __FUNCT__
2853f457be1SHong Zhang #define __FUNCT__ "MatGetRedundantMatrix"
286f664ae05SHong Zhang PetscErrorCode MatGetRedundantMatrix_AIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2873f457be1SHong Zhang {
288b3804887SHong Zhang   PetscMPIInt    rank,size;
2893f457be1SHong Zhang   MPI_Comm       comm=mat->comm;
2903f457be1SHong Zhang   PetscErrorCode ierr;
291*3365e775SHong Zhang   PetscInt       nsends=0,nrecvs=0,i,rownz_max;
292*3365e775SHong Zhang   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
293b3804887SHong Zhang   PetscInt       *rowrange=mat->rmap.range;
2943f457be1SHong Zhang   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
295b3804887SHong Zhang   Mat            A=aij->A,B=aij->B,C=*matredundant;
2963f457be1SHong Zhang   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2973f457be1SHong Zhang   PetscScalar    *sbuf_a;
298b3804887SHong Zhang   PetscInt       nzlocal=a->nz+b->nz;
299b3804887SHong Zhang   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
3003f457be1SHong Zhang   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
301b3804887SHong Zhang   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
3023f457be1SHong Zhang   PetscScalar    *vals,*aworkA,*aworkB;
3033f457be1SHong Zhang   PetscMPIInt    tag1,tag2,tag3,imdex;
304*3365e775SHong Zhang   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
305*3365e775SHong Zhang                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
3063f457be1SHong Zhang   MPI_Status     recv_status,*send_status;
307*3365e775SHong Zhang   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
308*3365e775SHong Zhang   PetscInt       **rbuf_j=PETSC_NULL;
309*3365e775SHong Zhang   PetscScalar    **rbuf_a=PETSC_NULL;
310b3804887SHong Zhang   Mat_Redundant  *redund=PETSC_NULL;
311b3804887SHong Zhang   PetscObjectContainer container;
3123f457be1SHong Zhang 
3133f457be1SHong Zhang   PetscFunctionBegin;
3143f457be1SHong Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3153f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
316b3804887SHong Zhang 
317b3804887SHong Zhang   if (reuse == MAT_REUSE_MATRIX) {
318b3804887SHong Zhang     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
319b3804887SHong Zhang     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
320b3804887SHong Zhang     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
321b3804887SHong Zhang     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
322b3804887SHong Zhang     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
323b3804887SHong Zhang     if (container) {
324b3804887SHong Zhang       ierr = PetscObjectContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
325b3804887SHong Zhang     } else {
326b3804887SHong Zhang       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
327b3804887SHong Zhang     }
328b3804887SHong Zhang     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
329b3804887SHong Zhang 
330b3804887SHong Zhang     nsends    = redund->nsends;
331b3804887SHong Zhang     nrecvs    = redund->nrecvs;
332b3804887SHong Zhang     send_rank = redund->send_rank; recv_rank = send_rank + size;
333b3804887SHong Zhang     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
334b3804887SHong Zhang     sbuf_j    = redund->sbuf_j;
335b3804887SHong Zhang     sbuf_a    = redund->sbuf_a;
336b3804887SHong Zhang     rbuf_j    = redund->rbuf_j;
337b3804887SHong Zhang     rbuf_a    = redund->rbuf_a;
338b3804887SHong Zhang   }
339b3804887SHong Zhang 
340b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
341b3804887SHong Zhang     PetscMPIInt  subrank,subsize;
342b3804887SHong Zhang     PetscInt     nleftover,np_subcomm;
343b3804887SHong Zhang     /* get the destination processors' id send_rank, nsends and nrecvs */
3443f457be1SHong Zhang     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
3453f457be1SHong Zhang     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
3463f457be1SHong Zhang     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
3473f457be1SHong Zhang     recv_rank = send_rank + size;
3483f457be1SHong Zhang     np_subcomm = size/nsubcomm;
3493f457be1SHong Zhang     nleftover  = size - nsubcomm*np_subcomm;
3503f457be1SHong Zhang     nsends = 0; nrecvs = 0;
3513f457be1SHong Zhang     for (i=0; i<size; i++){ /* i=rank*/
3523f457be1SHong Zhang       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
3533f457be1SHong Zhang         send_rank[nsends] = i; nsends++;
3543f457be1SHong Zhang         recv_rank[nrecvs++] = i;
3553f457be1SHong Zhang       }
3563f457be1SHong Zhang     }
3573f457be1SHong Zhang     if (rank >= size - nleftover){/* this proc is a leftover processor */
3583f457be1SHong Zhang       i = size-nleftover-1;
3593f457be1SHong Zhang       j = 0;
3603f457be1SHong Zhang       while (j < nsubcomm - nleftover){
3613f457be1SHong Zhang         send_rank[nsends++] = i;
3623f457be1SHong Zhang         i--; j++;
3633f457be1SHong Zhang       }
3643f457be1SHong Zhang     }
3653f457be1SHong Zhang 
3663f457be1SHong Zhang     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
3673f457be1SHong Zhang       for (i=0; i<nleftover; i++){
3683f457be1SHong Zhang         recv_rank[nrecvs++] = size-nleftover+i;
3693f457be1SHong Zhang       }
3703f457be1SHong Zhang     }
3713f457be1SHong Zhang 
372b3804887SHong Zhang     /* allocate sbuf_j, sbuf_a */
373b3804887SHong Zhang     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
374b3804887SHong Zhang     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
3753f457be1SHong Zhang     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
376b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3773f457be1SHong Zhang 
378b3804887SHong Zhang   /* copy mat's local entries into the buffers */
379b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
380b3804887SHong Zhang     rownz_max = 0;
3813f457be1SHong Zhang     rptr = sbuf_j;
3823f457be1SHong Zhang     cols = sbuf_j + rend-rstart + 1;
3833f457be1SHong Zhang     vals = sbuf_a;
3843f457be1SHong Zhang     rptr[0] = 0;
3853f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
3863f457be1SHong Zhang       row = i + rstart;
3873f457be1SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
3883f457be1SHong Zhang       ncols  = nzA + nzB;
3893f457be1SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
3903f457be1SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
3913f457be1SHong Zhang       /* load the column indices for this row into cols */
3923f457be1SHong Zhang       lwrite = 0;
3933f457be1SHong Zhang       for (l=0; l<nzB; l++) {
3943f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart){
3953f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
3963f457be1SHong Zhang           cols[lwrite++] = ctmp;
3973f457be1SHong Zhang         }
3983f457be1SHong Zhang       }
3993f457be1SHong Zhang       for (l=0; l<nzA; l++){
4003f457be1SHong Zhang         vals[lwrite]   = aworkA[l];
4013f457be1SHong Zhang         cols[lwrite++] = cstart + cworkA[l];
4023f457be1SHong Zhang       }
4033f457be1SHong Zhang       for (l=0; l<nzB; l++) {
4043f457be1SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend){
4053f457be1SHong Zhang           vals[lwrite]   = aworkB[l];
4063f457be1SHong Zhang           cols[lwrite++] = ctmp;
4073f457be1SHong Zhang         }
4083f457be1SHong Zhang       }
4093f457be1SHong Zhang       vals += ncols;
4103f457be1SHong Zhang       cols += ncols;
4113f457be1SHong Zhang       rptr[i+1] = rptr[i] + ncols;
412b3804887SHong Zhang       if (rownz_max < ncols) rownz_max = ncols;
4133f457be1SHong Zhang     }
4143f457be1SHong Zhang     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
415b3804887SHong Zhang   } else { /* only copy matrix values into sbuf_a */
416b3804887SHong Zhang     rptr = sbuf_j;
417b3804887SHong Zhang     vals = sbuf_a;
418b3804887SHong Zhang     rptr[0] = 0;
419b3804887SHong Zhang     for (i=0; i<rend-rstart; i++){
420b3804887SHong Zhang       row = i + rstart;
421b3804887SHong Zhang       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
422b3804887SHong Zhang       ncols  = nzA + nzB;
423b3804887SHong Zhang       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
424b3804887SHong Zhang       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
425b3804887SHong Zhang       lwrite = 0;
426b3804887SHong Zhang       for (l=0; l<nzB; l++) {
427b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
428b3804887SHong Zhang       }
429b3804887SHong Zhang       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
430b3804887SHong Zhang       for (l=0; l<nzB; l++) {
431b3804887SHong Zhang         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
432b3804887SHong Zhang       }
433b3804887SHong Zhang       vals += ncols;
434b3804887SHong Zhang       rptr[i+1] = rptr[i] + ncols;
435b3804887SHong Zhang     }
436b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
4373f457be1SHong Zhang 
4383f457be1SHong Zhang   /* send nzlocal to others, and recv other's nzlocal */
4393f457be1SHong Zhang   /*--------------------------------------------------*/
440b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
441b3804887SHong Zhang     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
442b3804887SHong Zhang     s_waits2 = s_waits3 + nsends;
443b3804887SHong Zhang     s_waits1 = s_waits2 + nsends;
444b3804887SHong Zhang     r_waits1 = s_waits1 + nsends;
4453f457be1SHong Zhang     r_waits2 = r_waits1 + nrecvs;
4463f457be1SHong Zhang     r_waits3 = r_waits2 + nrecvs;
447b3804887SHong Zhang   } else {
448b3804887SHong Zhang     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
449b3804887SHong Zhang     r_waits3 = s_waits3 + nsends;
450b3804887SHong Zhang   }
4513f457be1SHong Zhang 
452b3804887SHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
453b3804887SHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
454b3804887SHong Zhang     /* get new tags to keep the communication clean */
455b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
456b3804887SHong Zhang     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
457b3804887SHong Zhang     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
458b3804887SHong Zhang     rbuf_nz = sbuf_nz + nsends;
4593f457be1SHong Zhang 
4603f457be1SHong Zhang     /* post receives of other's nzlocal */
4613f457be1SHong Zhang     for (i=0; i<nrecvs; i++){
4623f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
4633f457be1SHong Zhang     }
4643f457be1SHong Zhang     /* send nzlocal to others */
4653f457be1SHong Zhang     for (i=0; i<nsends; i++){
4663f457be1SHong Zhang       sbuf_nz[i] = nzlocal;
4673f457be1SHong Zhang       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
4683f457be1SHong Zhang     }
4693f457be1SHong Zhang     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
4703f457be1SHong Zhang     count = nrecvs;
4713f457be1SHong Zhang     while (count) {
4723f457be1SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
4733f457be1SHong Zhang       recv_rank[imdex] = recv_status.MPI_SOURCE;
474b3804887SHong Zhang       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
4753f457be1SHong Zhang       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
4763f457be1SHong Zhang 
477b3804887SHong Zhang       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
478b3804887SHong Zhang       rbuf_nz[imdex] += i + 2;
4793f457be1SHong Zhang       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
4803f457be1SHong Zhang       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
4813f457be1SHong Zhang       count--;
4823f457be1SHong Zhang     }
4833f457be1SHong Zhang     /* wait on sends of nzlocal */
4843f457be1SHong Zhang     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
485b3804887SHong Zhang     /* send mat->i,j to others, and recv from other's */
486b3804887SHong Zhang     /*------------------------------------------------*/
487b3804887SHong Zhang     for (i=0; i<nsends; i++){
488b3804887SHong Zhang       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
489b3804887SHong Zhang       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
490b3804887SHong Zhang     }
491b3804887SHong Zhang     /* wait on receives of mat->i,j */
492b3804887SHong Zhang     /*------------------------------*/
493b3804887SHong Zhang     count = nrecvs;
494b3804887SHong Zhang     while (count) {
495b3804887SHong Zhang       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
496b3804887SHong Zhang       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
497b3804887SHong Zhang       count--;
498b3804887SHong Zhang     }
499b3804887SHong Zhang     /* wait on sends of mat->i,j */
500b3804887SHong Zhang     /*---------------------------*/
501b3804887SHong Zhang     if (nsends) {
502b3804887SHong Zhang       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
503b3804887SHong Zhang     }
504b3804887SHong Zhang   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
5053f457be1SHong Zhang 
506b3804887SHong Zhang   /* post receives, send and receive mat->a */
507b3804887SHong Zhang   /*----------------------------------------*/
508b3804887SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++) {
509b3804887SHong Zhang     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
510b3804887SHong Zhang   }
5113f457be1SHong Zhang   for (i=0; i<nsends; i++){
5123f457be1SHong Zhang     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
5133f457be1SHong Zhang   }
5143f457be1SHong Zhang   count = nrecvs;
5153f457be1SHong Zhang   while (count) {
5163f457be1SHong Zhang     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
5173f457be1SHong Zhang     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
5183f457be1SHong Zhang     count--;
5193f457be1SHong Zhang   }
5203f457be1SHong Zhang   if (nsends) {
5213f457be1SHong Zhang     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
5223f457be1SHong Zhang   }
523b3804887SHong Zhang 
524b3804887SHong Zhang   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
5253f457be1SHong Zhang 
5263f457be1SHong Zhang   /* create redundant matrix */
5273f457be1SHong Zhang   /*-------------------------*/
5280ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
529b3804887SHong Zhang     /* compute rownz_max for preallocation */
530b3804887SHong Zhang     for (imdex=0; imdex<nrecvs; imdex++){
531b3804887SHong Zhang       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
532b3804887SHong Zhang       rptr = rbuf_j[imdex];
533b3804887SHong Zhang       for (i=0; i<j; i++){
534b3804887SHong Zhang         ncols = rptr[i+1] - rptr[i];
535b3804887SHong Zhang         if (rownz_max < ncols) rownz_max = ncols;
536b3804887SHong Zhang       }
537b3804887SHong Zhang     }
538b3804887SHong Zhang 
5393f457be1SHong Zhang     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
5403f457be1SHong Zhang     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
5413f457be1SHong Zhang     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
542b3804887SHong Zhang     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
543b3804887SHong Zhang     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
5440ae51fcdSHong Zhang   } else {
5450ae51fcdSHong Zhang     C = *matredundant;
5460ae51fcdSHong Zhang   }
547b3804887SHong Zhang 
5483f457be1SHong Zhang   /* insert local matrix entries */
5493f457be1SHong Zhang   rptr = sbuf_j;
5503f457be1SHong Zhang   cols = sbuf_j + rend-rstart + 1;
5513f457be1SHong Zhang   vals = sbuf_a;
5523f457be1SHong Zhang   for (i=0; i<rend-rstart; i++){
5533f457be1SHong Zhang     row   = i + rstart;
5543f457be1SHong Zhang     ncols = rptr[i+1] - rptr[i];
5553f457be1SHong Zhang     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
5563f457be1SHong Zhang     vals += ncols;
5573f457be1SHong Zhang     cols += ncols;
5583f457be1SHong Zhang   }
5593f457be1SHong Zhang   /* insert received matrix entries */
5603f457be1SHong Zhang   for (imdex=0; imdex<nrecvs; imdex++){
5613f457be1SHong Zhang     rstart = rowrange[recv_rank[imdex]];
5623f457be1SHong Zhang     rend   = rowrange[recv_rank[imdex]+1];
5633f457be1SHong Zhang     rptr = rbuf_j[imdex];
5643f457be1SHong Zhang     cols = rbuf_j[imdex] + rend-rstart + 1;
5653f457be1SHong Zhang     vals = rbuf_a[imdex];
5663f457be1SHong Zhang     for (i=0; i<rend-rstart; i++){
5673f457be1SHong Zhang       row   = i + rstart;
5683f457be1SHong Zhang       ncols = rptr[i+1] - rptr[i];
5693f457be1SHong Zhang       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
5703f457be1SHong Zhang       vals += ncols;
5713f457be1SHong Zhang       cols += ncols;
5723f457be1SHong Zhang     }
5733f457be1SHong Zhang   }
5743f457be1SHong Zhang   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5753f457be1SHong Zhang   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763f457be1SHong Zhang   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
5773f457be1SHong Zhang   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
5780ae51fcdSHong Zhang   if (reuse == MAT_INITIAL_MATRIX){
579b3804887SHong Zhang     PetscObjectContainer container;
5803f457be1SHong Zhang     *matredundant = C;
581b3804887SHong Zhang     /* create a supporting struct and attach it to C for reuse */
582b3804887SHong Zhang     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
583b3804887SHong Zhang     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
584b3804887SHong Zhang     ierr = PetscObjectContainerSetPointer(container,redund);CHKERRQ(ierr);
585b3804887SHong Zhang     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
586b3804887SHong Zhang     ierr = PetscObjectContainerSetUserDestroy(container,PetscObjectContainerDestroy_MatRedundant);CHKERRQ(ierr);
5873f457be1SHong Zhang 
588b3804887SHong Zhang     redund->nzlocal = nzlocal;
589b3804887SHong Zhang     redund->nsends  = nsends;
590b3804887SHong Zhang     redund->nrecvs  = nrecvs;
591b3804887SHong Zhang     redund->send_rank = send_rank;
592b3804887SHong Zhang     redund->sbuf_nz = sbuf_nz;
593b3804887SHong Zhang     redund->sbuf_j  = sbuf_j;
594b3804887SHong Zhang     redund->sbuf_a  = sbuf_a;
595b3804887SHong Zhang     redund->rbuf_j  = rbuf_j;
596b3804887SHong Zhang     redund->rbuf_a  = rbuf_a;
597b3804887SHong Zhang 
598b3804887SHong Zhang     redund->MatDestroy = C->ops->destroy;
599b3804887SHong Zhang     C->ops->destroy    = MatDestroy_MatRedundant;
6003f457be1SHong Zhang   }
6013f457be1SHong Zhang   PetscFunctionReturn(0);
6023f457be1SHong Zhang }
6033f457be1SHong Zhang 
6044b9ad928SBarry Smith #undef __FUNCT__
6054b9ad928SBarry Smith #define __FUNCT__ "PCSetUp_Redundant"
6066849ba73SBarry Smith static PetscErrorCode PCSetUp_Redundant(PC pc)
6074b9ad928SBarry Smith {
6084b9ad928SBarry Smith   PC_Redundant   *red  = (PC_Redundant*)pc->data;
609dfbe8321SBarry Smith   PetscErrorCode ierr;
6103f457be1SHong Zhang   PetscInt       mstart,mend,mlocal,m;
61113f74950SBarry Smith   PetscMPIInt    size;
6124b9ad928SBarry Smith   MatReuse       reuse = MAT_INITIAL_MATRIX;
6134b9ad928SBarry Smith   MatStructure   str   = DIFFERENT_NONZERO_PATTERN;
6144b9ad928SBarry Smith   MPI_Comm       comm;
61523ce1328SBarry Smith   Vec            vec;
6164b9ad928SBarry Smith 
6173f457be1SHong Zhang   PetscInt       mlocal_sub;
6183f457be1SHong Zhang   PetscMPIInt    subsize,subrank;
6193f457be1SHong Zhang   PetscInt       rstart_sub,rend_sub,mloc_sub;
6203f457be1SHong Zhang 
6214b9ad928SBarry Smith   PetscFunctionBegin;
6223f457be1SHong Zhang   ierr = PetscObjectGetComm((PetscObject)pc->pmat,&comm);CHKERRQ(ierr);
62323ce1328SBarry Smith   ierr = MatGetVecs(pc->pmat,&vec,0);CHKERRQ(ierr);
6244b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
62523ce1328SBarry Smith   ierr = VecGetSize(vec,&m);CHKERRQ(ierr);
6264b9ad928SBarry Smith   if (!pc->setupcalled) {
6273f457be1SHong Zhang     /* create working vectors xsub/ysub and xdup/ydup */
62823ce1328SBarry Smith     ierr = VecGetLocalSize(vec,&mlocal);CHKERRQ(ierr);
6293f457be1SHong Zhang     ierr = VecGetOwnershipRange(vec,&mstart,&mend);CHKERRQ(ierr);
6304b9ad928SBarry Smith 
6313f457be1SHong Zhang     /* get local size of xsub/ysub */
6323f457be1SHong Zhang     ierr = MPI_Comm_size(red->subcomm,&subsize);CHKERRQ(ierr);
6333f457be1SHong Zhang     ierr = MPI_Comm_rank(red->subcomm,&subrank);CHKERRQ(ierr);
6343f457be1SHong Zhang     rstart_sub = pc->pmat->rmap.range[red->nsubcomm*subrank]; /* rstart in xsub/ysub */
6353f457be1SHong Zhang     if (subrank+1 < subsize){
6363f457be1SHong Zhang       rend_sub = pc->pmat->rmap.range[red->nsubcomm*(subrank+1)];
6373f457be1SHong Zhang     } else {
6383f457be1SHong Zhang       rend_sub = m;
6393f457be1SHong Zhang     }
6403f457be1SHong Zhang     mloc_sub = rend_sub - rstart_sub;
6413f457be1SHong Zhang     ierr = VecCreateMPI(red->subcomm,mloc_sub,PETSC_DECIDE,&red->ysub);CHKERRQ(ierr);
6423f457be1SHong Zhang     /* create xsub with empty local arrays, because xdup's arrays will be placed into it */
6433f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->subcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->xsub);CHKERRQ(ierr);
6443f457be1SHong Zhang 
6453f457be1SHong Zhang     /* create xdup and ydup. ydup has empty local arrays because ysub's arrays will be place into it.
6463f457be1SHong Zhang        Note: we use communicator dupcomm, not pc->comm! */
6473f457be1SHong Zhang     ierr = VecCreateMPI(red->dupcomm,mloc_sub,PETSC_DECIDE,&red->xdup);CHKERRQ(ierr);
6483f457be1SHong Zhang     ierr = VecCreateMPIWithArray(red->dupcomm,mloc_sub,PETSC_DECIDE,PETSC_NULL,&red->ydup);CHKERRQ(ierr);
6493f457be1SHong Zhang 
6503f457be1SHong Zhang     /* create vec scatters */
6513f457be1SHong Zhang     if (!red->scatterin){
6523f457be1SHong Zhang       IS       is1,is2;
6533f457be1SHong Zhang       PetscInt *idx1,*idx2,i,j,k;
6543f457be1SHong Zhang       ierr = PetscMalloc(2*red->nsubcomm*mlocal*sizeof(PetscInt),&idx1);CHKERRQ(ierr);
6553f457be1SHong Zhang       idx2 = idx1 + red->nsubcomm*mlocal;
6563f457be1SHong Zhang       j = 0;
6573f457be1SHong Zhang       for (k=0; k<red->nsubcomm; k++){
6583f457be1SHong Zhang         for (i=mstart; i<mend; i++){
6593f457be1SHong Zhang           idx1[j]   = i;
6603f457be1SHong Zhang           idx2[j++] = i + m*k;
6613f457be1SHong Zhang         }
6623f457be1SHong Zhang       }
6633f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx1,&is1);CHKERRQ(ierr);
6643f457be1SHong Zhang       ierr = ISCreateGeneral(comm,red->nsubcomm*mlocal,idx2,&is2);CHKERRQ(ierr);
6653f457be1SHong Zhang       ierr = VecScatterCreate(vec,is1,red->xdup,is2,&red->scatterin);CHKERRQ(ierr);
6663f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
6673f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
6683f457be1SHong Zhang 
6693f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart+ red->color*m,1,&is1);CHKERRQ(ierr);
6703f457be1SHong Zhang       ierr = ISCreateStride(comm,mlocal,mstart,1,&is2);CHKERRQ(ierr);
6713f457be1SHong Zhang       ierr = VecScatterCreate(red->xdup,is1,vec,is2,&red->scatterout);CHKERRQ(ierr);
6723f457be1SHong Zhang       ierr = ISDestroy(is1);CHKERRQ(ierr);
6733f457be1SHong Zhang       ierr = ISDestroy(is2);CHKERRQ(ierr);
6743f457be1SHong Zhang       ierr = PetscFree(idx1);CHKERRQ(ierr);
6754b9ad928SBarry Smith     }
6764b9ad928SBarry Smith   }
67723ce1328SBarry Smith   ierr = VecDestroy(vec);CHKERRQ(ierr);
6784b9ad928SBarry Smith 
6794b9ad928SBarry Smith   /* if pmatrix set by user is sequential then we do not need to gather the parallel matrix */
6803f457be1SHong Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
6814b9ad928SBarry Smith   if (size == 1) {
6824b9ad928SBarry Smith     red->useparallelmat = PETSC_FALSE;
6834b9ad928SBarry Smith   }
6844b9ad928SBarry Smith 
6854b9ad928SBarry Smith   if (red->useparallelmat) {
6864b9ad928SBarry Smith     if (pc->setupcalled == 1 && pc->flag == DIFFERENT_NONZERO_PATTERN) {
6874b9ad928SBarry Smith       /* destroy old matrices */
6884b9ad928SBarry Smith       if (red->pmats) {
689b3804887SHong Zhang         ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
6904b9ad928SBarry Smith       }
6914b9ad928SBarry Smith     } else if (pc->setupcalled == 1) {
6924b9ad928SBarry Smith       reuse = MAT_REUSE_MATRIX;
6934b9ad928SBarry Smith       str   = SAME_NONZERO_PATTERN;
6944b9ad928SBarry Smith     }
6954b9ad928SBarry Smith 
6963f457be1SHong Zhang     /* grab the parallel matrix and put it into processors of a subcomminicator */
697f664ae05SHong Zhang     /*--------------------------------------------------------------------------*/
698f664ae05SHong Zhang     ierr = VecGetLocalSize(red->ysub,&mlocal_sub);CHKERRQ(ierr);
699b3804887SHong Zhang     ierr = MatGetRedundantMatrix_AIJ(pc->pmat,red->nsubcomm,red->subcomm,mlocal_sub,reuse,&red->pmats);CHKERRQ(ierr);
7003f457be1SHong Zhang     /* tell PC of the subcommunicator its operators */
701b3804887SHong Zhang     ierr = PCSetOperators(red->pc,red->pmats,red->pmats,str);CHKERRQ(ierr);
7024b9ad928SBarry Smith   } else {
7034b9ad928SBarry Smith     ierr = PCSetOperators(red->pc,pc->mat,pc->pmat,pc->flag);CHKERRQ(ierr);
7044b9ad928SBarry Smith   }
7054b9ad928SBarry Smith   ierr = PCSetFromOptions(red->pc);CHKERRQ(ierr);
7064b9ad928SBarry Smith   ierr = PCSetUp(red->pc);CHKERRQ(ierr);
7074b9ad928SBarry Smith   PetscFunctionReturn(0);
7084b9ad928SBarry Smith }
7094b9ad928SBarry Smith 
7104b9ad928SBarry Smith #undef __FUNCT__
7114b9ad928SBarry Smith #define __FUNCT__ "PCApply_Redundant"
7126849ba73SBarry Smith static PetscErrorCode PCApply_Redundant(PC pc,Vec x,Vec y)
7134b9ad928SBarry Smith {
7144b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
715dfbe8321SBarry Smith   PetscErrorCode ierr;
7163f457be1SHong Zhang   PetscScalar    *array;
7174b9ad928SBarry Smith 
7184b9ad928SBarry Smith   PetscFunctionBegin;
7193f457be1SHong Zhang   /* scatter x to xdup */
7203f457be1SHong Zhang   ierr = VecScatterBegin(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
7213f457be1SHong Zhang   ierr = VecScatterEnd(x,red->xdup,INSERT_VALUES,SCATTER_FORWARD,red->scatterin);CHKERRQ(ierr);
7223f457be1SHong Zhang 
7233f457be1SHong Zhang   /* place xdup's local array into xsub */
7243f457be1SHong Zhang   ierr = VecGetArray(red->xdup,&array);CHKERRQ(ierr);
7253f457be1SHong Zhang   ierr = VecPlaceArray(red->xsub,(const PetscScalar*)array);CHKERRQ(ierr);
7264b9ad928SBarry Smith 
7274b9ad928SBarry Smith   /* apply preconditioner on each processor */
7283f457be1SHong Zhang   ierr = PCApply(red->pc,red->xsub,red->ysub);CHKERRQ(ierr);
7293f457be1SHong Zhang   ierr = VecResetArray(red->xsub);CHKERRQ(ierr);
7303f457be1SHong Zhang   ierr = VecRestoreArray(red->xdup,&array);CHKERRQ(ierr);
7314b9ad928SBarry Smith 
7323f457be1SHong Zhang   /* place ysub's local array into ydup */
7333f457be1SHong Zhang   ierr = VecGetArray(red->ysub,&array);CHKERRQ(ierr);
7343f457be1SHong Zhang   ierr = VecPlaceArray(red->ydup,(const PetscScalar*)array);CHKERRQ(ierr);
7353f457be1SHong Zhang 
7363f457be1SHong Zhang   /* scatter ydup to y */
7373f457be1SHong Zhang   ierr = VecScatterBegin(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
7383f457be1SHong Zhang   ierr = VecScatterEnd(red->ydup,y,INSERT_VALUES,SCATTER_FORWARD,red->scatterout);CHKERRQ(ierr);
7393f457be1SHong Zhang   ierr = VecResetArray(red->ydup);CHKERRQ(ierr);
7403f457be1SHong Zhang   ierr = VecRestoreArray(red->ysub,&array);CHKERRQ(ierr);
7414b9ad928SBarry Smith   PetscFunctionReturn(0);
7424b9ad928SBarry Smith }
7434b9ad928SBarry Smith 
7444b9ad928SBarry Smith #undef __FUNCT__
7454b9ad928SBarry Smith #define __FUNCT__ "PCDestroy_Redundant"
7466849ba73SBarry Smith static PetscErrorCode PCDestroy_Redundant(PC pc)
7474b9ad928SBarry Smith {
7484b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
749dfbe8321SBarry Smith   PetscErrorCode ierr;
7504b9ad928SBarry Smith 
7514b9ad928SBarry Smith   PetscFunctionBegin;
7524b9ad928SBarry Smith   if (red->scatterin)  {ierr = VecScatterDestroy(red->scatterin);CHKERRQ(ierr);}
7534b9ad928SBarry Smith   if (red->scatterout) {ierr = VecScatterDestroy(red->scatterout);CHKERRQ(ierr);}
7543f457be1SHong Zhang   if (red->ysub)       {ierr = VecDestroy(red->ysub);CHKERRQ(ierr);}
7553f457be1SHong Zhang   if (red->xsub)       {ierr = VecDestroy(red->xsub);CHKERRQ(ierr);}
7563f457be1SHong Zhang   if (red->xdup)       {ierr = VecDestroy(red->xdup);CHKERRQ(ierr);}
7573f457be1SHong Zhang   if (red->ydup)       {ierr = VecDestroy(red->ydup);CHKERRQ(ierr);}
758b3804887SHong Zhang   if (red->pmats) {
759b3804887SHong Zhang     ierr = MatDestroy(red->pmats);CHKERRQ(ierr);
7603f457be1SHong Zhang   }
7613f457be1SHong Zhang 
762b3804887SHong Zhang 
7634b9ad928SBarry Smith   ierr = PCDestroy(red->pc);CHKERRQ(ierr);
7644b9ad928SBarry Smith   ierr = PetscFree(red);CHKERRQ(ierr);
7654b9ad928SBarry Smith   PetscFunctionReturn(0);
7664b9ad928SBarry Smith }
7674b9ad928SBarry Smith 
7684b9ad928SBarry Smith #undef __FUNCT__
7694b9ad928SBarry Smith #define __FUNCT__ "PCSetFromOptions_Redundant"
7706849ba73SBarry Smith static PetscErrorCode PCSetFromOptions_Redundant(PC pc)
7714b9ad928SBarry Smith {
7724b9ad928SBarry Smith   PetscFunctionBegin;
7734b9ad928SBarry Smith   PetscFunctionReturn(0);
7744b9ad928SBarry Smith }
7754b9ad928SBarry Smith 
7764b9ad928SBarry Smith EXTERN_C_BEGIN
7774b9ad928SBarry Smith #undef __FUNCT__
7784b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter_Redundant"
779dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter_Redundant(PC pc,VecScatter in,VecScatter out)
7804b9ad928SBarry Smith {
7814b9ad928SBarry Smith   PC_Redundant   *red = (PC_Redundant*)pc->data;
782dfbe8321SBarry Smith   PetscErrorCode ierr;
7834b9ad928SBarry Smith 
7844b9ad928SBarry Smith   PetscFunctionBegin;
7854b9ad928SBarry Smith   red->scatterin  = in;
7864b9ad928SBarry Smith   red->scatterout = out;
7874b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)in);CHKERRQ(ierr);
7884b9ad928SBarry Smith   ierr = PetscObjectReference((PetscObject)out);CHKERRQ(ierr);
7894b9ad928SBarry Smith   PetscFunctionReturn(0);
7904b9ad928SBarry Smith }
7914b9ad928SBarry Smith EXTERN_C_END
7924b9ad928SBarry Smith 
7934b9ad928SBarry Smith #undef __FUNCT__
7944b9ad928SBarry Smith #define __FUNCT__ "PCRedundantSetScatter"
7954b9ad928SBarry Smith /*@
7964b9ad928SBarry Smith    PCRedundantSetScatter - Sets the scatter used to copy values into the
7974b9ad928SBarry Smith      redundant local solve and the scatter to move them back into the global
7984b9ad928SBarry Smith      vector.
7994b9ad928SBarry Smith 
8004b9ad928SBarry Smith    Collective on PC
8014b9ad928SBarry Smith 
8024b9ad928SBarry Smith    Input Parameters:
8034b9ad928SBarry Smith +  pc - the preconditioner context
8044b9ad928SBarry Smith .  in - the scatter to move the values in
8054b9ad928SBarry Smith -  out - the scatter to move them out
8064b9ad928SBarry Smith 
8074b9ad928SBarry Smith    Level: advanced
8084b9ad928SBarry Smith 
8094b9ad928SBarry Smith .keywords: PC, redundant solve
8104b9ad928SBarry Smith @*/
811dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantSetScatter(PC pc,VecScatter in,VecScatter out)
8124b9ad928SBarry Smith {
813dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,VecScatter,VecScatter);
8144b9ad928SBarry Smith 
8154b9ad928SBarry Smith   PetscFunctionBegin;
8164482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
8174482741eSBarry Smith   PetscValidHeaderSpecific(in,VEC_SCATTER_COOKIE,2);
8184482741eSBarry Smith   PetscValidHeaderSpecific(out,VEC_SCATTER_COOKIE,3);
8194b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantSetScatter_C",(void (**)(void))&f);CHKERRQ(ierr);
8204b9ad928SBarry Smith   if (f) {
8214b9ad928SBarry Smith     ierr = (*f)(pc,in,out);CHKERRQ(ierr);
8224b9ad928SBarry Smith   }
8234b9ad928SBarry Smith   PetscFunctionReturn(0);
8244b9ad928SBarry Smith }
8254b9ad928SBarry Smith 
8264b9ad928SBarry Smith EXTERN_C_BEGIN
8274b9ad928SBarry Smith #undef __FUNCT__
8284b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC_Redundant"
829dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC_Redundant(PC pc,PC *innerpc)
8304b9ad928SBarry Smith {
8314b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
8324b9ad928SBarry Smith 
8334b9ad928SBarry Smith   PetscFunctionBegin;
8344b9ad928SBarry Smith   *innerpc = red->pc;
8354b9ad928SBarry Smith   PetscFunctionReturn(0);
8364b9ad928SBarry Smith }
8374b9ad928SBarry Smith EXTERN_C_END
8384b9ad928SBarry Smith 
8394b9ad928SBarry Smith #undef __FUNCT__
8404b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetPC"
8414b9ad928SBarry Smith /*@
8424b9ad928SBarry Smith    PCRedundantGetPC - Gets the sequential PC created by the redundant PC.
8434b9ad928SBarry Smith 
8444b9ad928SBarry Smith    Not Collective
8454b9ad928SBarry Smith 
8464b9ad928SBarry Smith    Input Parameter:
8474b9ad928SBarry Smith .  pc - the preconditioner context
8484b9ad928SBarry Smith 
8494b9ad928SBarry Smith    Output Parameter:
8504b9ad928SBarry Smith .  innerpc - the sequential PC
8514b9ad928SBarry Smith 
8524b9ad928SBarry Smith    Level: advanced
8534b9ad928SBarry Smith 
8544b9ad928SBarry Smith .keywords: PC, redundant solve
8554b9ad928SBarry Smith @*/
856dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetPC(PC pc,PC *innerpc)
8574b9ad928SBarry Smith {
858dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,PC*);
8594b9ad928SBarry Smith 
8604b9ad928SBarry Smith   PetscFunctionBegin;
8614482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
8624482741eSBarry Smith   PetscValidPointer(innerpc,2);
8634b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetPC_C",(void (**)(void))&f);CHKERRQ(ierr);
8644b9ad928SBarry Smith   if (f) {
8654b9ad928SBarry Smith     ierr = (*f)(pc,innerpc);CHKERRQ(ierr);
8664b9ad928SBarry Smith   }
8674b9ad928SBarry Smith   PetscFunctionReturn(0);
8684b9ad928SBarry Smith }
8694b9ad928SBarry Smith 
8704b9ad928SBarry Smith EXTERN_C_BEGIN
8714b9ad928SBarry Smith #undef __FUNCT__
8724b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators_Redundant"
873dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators_Redundant(PC pc,Mat *mat,Mat *pmat)
8744b9ad928SBarry Smith {
8754b9ad928SBarry Smith   PC_Redundant *red = (PC_Redundant*)pc->data;
8764b9ad928SBarry Smith 
8774b9ad928SBarry Smith   PetscFunctionBegin;
878b3804887SHong Zhang   if (mat)  *mat  = red->pmats;
879b3804887SHong Zhang   if (pmat) *pmat = red->pmats;
8804b9ad928SBarry Smith   PetscFunctionReturn(0);
8814b9ad928SBarry Smith }
8824b9ad928SBarry Smith EXTERN_C_END
8834b9ad928SBarry Smith 
8844b9ad928SBarry Smith #undef __FUNCT__
8854b9ad928SBarry Smith #define __FUNCT__ "PCRedundantGetOperators"
8864b9ad928SBarry Smith /*@
8874b9ad928SBarry Smith    PCRedundantGetOperators - gets the sequential matrix and preconditioner matrix
8884b9ad928SBarry Smith 
8894b9ad928SBarry Smith    Not Collective
8904b9ad928SBarry Smith 
8914b9ad928SBarry Smith    Input Parameter:
8924b9ad928SBarry Smith .  pc - the preconditioner context
8934b9ad928SBarry Smith 
8944b9ad928SBarry Smith    Output Parameters:
8954b9ad928SBarry Smith +  mat - the matrix
8964b9ad928SBarry Smith -  pmat - the (possibly different) preconditioner matrix
8974b9ad928SBarry Smith 
8984b9ad928SBarry Smith    Level: advanced
8994b9ad928SBarry Smith 
9004b9ad928SBarry Smith .keywords: PC, redundant solve
9014b9ad928SBarry Smith @*/
902dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCRedundantGetOperators(PC pc,Mat *mat,Mat *pmat)
9034b9ad928SBarry Smith {
904dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(PC,Mat*,Mat*);
9054b9ad928SBarry Smith 
9064b9ad928SBarry Smith   PetscFunctionBegin;
9074482741eSBarry Smith   PetscValidHeaderSpecific(pc,PC_COOKIE,1);
9084482741eSBarry Smith   if (mat)  PetscValidPointer(mat,2);
9094482741eSBarry Smith   if (pmat) PetscValidPointer(pmat,3);
9104b9ad928SBarry Smith   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCRedundantGetOperators_C",(void (**)(void))&f);CHKERRQ(ierr);
9114b9ad928SBarry Smith   if (f) {
9124b9ad928SBarry Smith     ierr = (*f)(pc,mat,pmat);CHKERRQ(ierr);
9134b9ad928SBarry Smith   }
9144b9ad928SBarry Smith   PetscFunctionReturn(0);
9154b9ad928SBarry Smith }
9164b9ad928SBarry Smith 
9174b9ad928SBarry Smith /* -------------------------------------------------------------------------------------*/
91837a17b4dSBarry Smith /*MC
91937a17b4dSBarry Smith      PCREDUNDANT - Runs a preconditioner for the entire problem on each processor
92037a17b4dSBarry Smith 
92137a17b4dSBarry Smith 
92237a17b4dSBarry Smith      Options for the redundant preconditioners can be set with -redundant_pc_xxx
92337a17b4dSBarry Smith 
92437a17b4dSBarry Smith    Level: intermediate
92537a17b4dSBarry Smith 
92637a17b4dSBarry Smith 
92737a17b4dSBarry Smith .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedundantSetScatter(),
92837a17b4dSBarry Smith            PCRedundantGetPC(), PCRedundantGetOperators()
92937a17b4dSBarry Smith 
93037a17b4dSBarry Smith M*/
93137a17b4dSBarry Smith 
9324b9ad928SBarry Smith EXTERN_C_BEGIN
9334b9ad928SBarry Smith #undef __FUNCT__
9344b9ad928SBarry Smith #define __FUNCT__ "PCCreate_Redundant"
935dba47a55SKris Buschelman PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_Redundant(PC pc)
9364b9ad928SBarry Smith {
937dfbe8321SBarry Smith   PetscErrorCode ierr;
9384b9ad928SBarry Smith   PC_Redundant   *red;
939e060cb09SBarry Smith   const char     *prefix;
9403f457be1SHong Zhang   PetscInt       nsubcomm,np_subcomm,nleftover,i,j,color;
941f6341431SHong Zhang   PetscMPIInt    rank,size,subrank,*subsize,duprank;
942f6341431SHong Zhang   MPI_Comm       subcomm=0,dupcomm=0;
9433f457be1SHong Zhang 
9444b9ad928SBarry Smith   PetscFunctionBegin;
9454b9ad928SBarry Smith   ierr = PetscNew(PC_Redundant,&red);CHKERRQ(ierr);
94652e6d16bSBarry Smith   ierr = PetscLogObjectMemory(pc,sizeof(PC_Redundant));CHKERRQ(ierr);
9474b9ad928SBarry Smith   red->useparallelmat   = PETSC_TRUE;
9484b9ad928SBarry Smith 
9493f457be1SHong Zhang   ierr = MPI_Comm_rank(pc->comm,&rank);CHKERRQ(ierr);
9503f457be1SHong Zhang   ierr = MPI_Comm_size(pc->comm,&size);CHKERRQ(ierr);
9513f457be1SHong Zhang   nsubcomm = size;
9523f457be1SHong Zhang   ierr = PetscOptionsGetInt(PETSC_NULL,"-nsubcomm",&nsubcomm,PETSC_NULL);CHKERRQ(ierr);
9533f457be1SHong Zhang   if (nsubcomm > size) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Num of subcommunicators %D cannot be larger than MPI_Comm_size %D",nsubcomm,size);
9543f457be1SHong Zhang 
955f6341431SHong Zhang   /*--------------------------------------------------------------------------------------------------
956f6341431SHong Zhang     To avoid data scattering from subcomm back to original comm, we create subcomm by iteratively taking a
957f6341431SHong Zhang     processe into a subcomm.
958f6341431SHong Zhang     An example: size=4, nsubcomm=3
959f6341431SHong Zhang      pc->comm:
960f6341431SHong Zhang       rank:     [0]  [1]  [2]  [3]
961f6341431SHong Zhang       color:     0    1    2    0
962f6341431SHong Zhang 
963f6341431SHong Zhang      subcomm:
964f6341431SHong Zhang       subrank:  [0]  [0]  [0]  [1]
965f6341431SHong Zhang 
966f6341431SHong Zhang      dupcomm:
967f6341431SHong Zhang       duprank:  [0]  [2]  [3]  [1]
968f6341431SHong Zhang 
969f6341431SHong Zhang      Here, subcomm[color = 0] has subsize=2, owns process [0] and [3]
970f6341431SHong Zhang            subcomm[color = 1] has subsize=1, owns process [1]
971f6341431SHong Zhang            subcomm[color = 2] has subsize=1, owns process [2]
972f6341431SHong Zhang           dupcomm has same number of processes as pc->comm, and its duprank maps
973f6341431SHong Zhang           processes in subcomm contiguously into a 1d array:
974f6341431SHong Zhang             duprank: [0] [1]      [2]         [3]
975f6341431SHong Zhang             rank:    [0] [3]      [1]         [2]
976f6341431SHong Zhang                     subcomm[0] subcomm[1]  subcomm[2]
977f6341431SHong Zhang    ----------------------------------------------------------------------------------------*/
978f6341431SHong Zhang 
9793f457be1SHong Zhang   /* get size of each subcommunicators */
9803f457be1SHong Zhang   ierr = PetscMalloc((1+nsubcomm)*sizeof(PetscMPIInt),&subsize);CHKERRQ(ierr);
9813f457be1SHong Zhang   np_subcomm = size/nsubcomm;
9823f457be1SHong Zhang   nleftover  = size - nsubcomm*np_subcomm;
9833f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
9843f457be1SHong Zhang     subsize[i] = np_subcomm;
9853f457be1SHong Zhang     if (i<nleftover) subsize[i]++;
9863f457be1SHong Zhang   }
9873f457be1SHong Zhang 
9883f457be1SHong Zhang   /* find color for this proc */
9893f457be1SHong Zhang   color   = rank%nsubcomm;
9903f457be1SHong Zhang   subrank = rank/nsubcomm;
9913f457be1SHong Zhang 
9923f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,color,subrank,&subcomm);CHKERRQ(ierr);
9933f457be1SHong Zhang   red->subcomm  = subcomm;
9943f457be1SHong Zhang   red->color    = color;
9953f457be1SHong Zhang   red->nsubcomm = nsubcomm;
9963f457be1SHong Zhang 
9973f457be1SHong Zhang   j = 0; duprank = 0;
9983f457be1SHong Zhang   for (i=0; i<nsubcomm; i++){
9993f457be1SHong Zhang     if (j == color){
10003f457be1SHong Zhang       duprank += subrank;
10013f457be1SHong Zhang       break;
10023f457be1SHong Zhang     }
10033f457be1SHong Zhang     duprank += subsize[i]; j++;
10043f457be1SHong Zhang   }
10053f457be1SHong Zhang 
1006f6341431SHong Zhang   /* create dupcomm with same size as comm, but its rank, duprank, maps subcomm's contiguously into dupcomm */
10073f457be1SHong Zhang   ierr = MPI_Comm_split(pc->comm,0,duprank,&dupcomm);CHKERRQ(ierr);
10083f457be1SHong Zhang   red->dupcomm = dupcomm;
10093f457be1SHong Zhang   ierr = PetscFree(subsize);CHKERRQ(ierr);
10103f457be1SHong Zhang 
10114b9ad928SBarry Smith   /* create the sequential PC that each processor has copy of */
10123f457be1SHong Zhang   ierr = PCCreate(subcomm,&red->pc);CHKERRQ(ierr);
10134b9ad928SBarry Smith   ierr = PCSetType(red->pc,PCLU);CHKERRQ(ierr);
10144b9ad928SBarry Smith   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
10154b9ad928SBarry Smith   ierr = PCSetOptionsPrefix(red->pc,prefix);CHKERRQ(ierr);
10164b9ad928SBarry Smith   ierr = PCAppendOptionsPrefix(red->pc,"redundant_");CHKERRQ(ierr);
10174b9ad928SBarry Smith 
10184b9ad928SBarry Smith   pc->ops->apply             = PCApply_Redundant;
10194b9ad928SBarry Smith   pc->ops->applytranspose    = 0;
10204b9ad928SBarry Smith   pc->ops->setup             = PCSetUp_Redundant;
10214b9ad928SBarry Smith   pc->ops->destroy           = PCDestroy_Redundant;
10224b9ad928SBarry Smith   pc->ops->setfromoptions    = PCSetFromOptions_Redundant;
10234b9ad928SBarry Smith   pc->ops->setuponblocks     = 0;
10244b9ad928SBarry Smith   pc->ops->view              = PCView_Redundant;
10254b9ad928SBarry Smith   pc->ops->applyrichardson   = 0;
10264b9ad928SBarry Smith 
10274b9ad928SBarry Smith   pc->data     = (void*)red;
10284b9ad928SBarry Smith 
10294b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantSetScatter_C","PCRedundantSetScatter_Redundant",
10304b9ad928SBarry Smith                     PCRedundantSetScatter_Redundant);CHKERRQ(ierr);
10314b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetPC_C","PCRedundantGetPC_Redundant",
10324b9ad928SBarry Smith                     PCRedundantGetPC_Redundant);CHKERRQ(ierr);
10334b9ad928SBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCRedundantGetOperators_C","PCRedundantGetOperators_Redundant",
10344b9ad928SBarry Smith                     PCRedundantGetOperators_Redundant);CHKERRQ(ierr);
10354b9ad928SBarry Smith 
10364b9ad928SBarry Smith   PetscFunctionReturn(0);
10374b9ad928SBarry Smith }
10384b9ad928SBarry Smith EXTERN_C_END
1039