xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <petsc-private/pcimpl.h>     /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP         ksp;
10   Vec         x,b;
11   VecScatter  scatter;
12   IS          is;
13   PetscInt    dcnt,*drows;    /* these are the local rows that have only diagonal entry */
14   PetscScalar *diag;
15   Vec         work;
16 } PC_Redistribute;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "PCView_Redistribute"
20 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
21 {
22   PC_Redistribute *red = (PC_Redistribute*)pc->data;
23   PetscErrorCode  ierr;
24   PetscBool       iascii,isstring;
25   PetscInt        ncnt,N;
26 
27   PetscFunctionBegin;
28   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
29   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
30   if (iascii) {
31     ierr = MPI_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
32     ierr = MatGetSize(pc->pmat,&N,PETSC_NULL);CHKERRQ(ierr);
33     ierr = PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");CHKERRQ(ierr);
35     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
36   } else if (isstring) {
37     ierr = PetscViewerStringSPrintf(viewer," Redistribute preconditioner");CHKERRQ(ierr);
38     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCSetUp_Redistribute"
45 static PetscErrorCode PCSetUp_Redistribute(PC pc)
46 {
47   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
48   PetscErrorCode    ierr;
49   MPI_Comm          comm;
50   PetscInt          rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
51   PetscLayout       map,nmap;
52   PetscMPIInt       size,imdex,tag,n;
53   PetscInt          *source = PETSC_NULL;
54   PetscMPIInt       *nprocs = PETSC_NULL,nrecvs;
55   PetscInt          j,nsends;
56   PetscInt          *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
57   PetscInt          *rvalues,*svalues,recvtotal;
58   PetscMPIInt       *onodes1,*olengths1;
59   MPI_Request       *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
60   MPI_Status        recv_status,*send_status;
61   Vec               tvec,diag;
62   Mat               tmat;
63   const PetscScalar *d;
64 
65   PetscFunctionBegin;
66   if (pc->setupcalled) {
67     ierr = KSPGetOperators(red->ksp,PETSC_NULL,&tmat,PETSC_NULL);CHKERRQ(ierr);
68     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);CHKERRQ(ierr);
69     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
70   } else {
71     PetscInt NN;
72 
73     ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
74     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
75     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
76 
77     /* count non-diagonal rows on process */
78     ierr = MatGetOwnershipRange(pc->mat,&rstart,&rend);CHKERRQ(ierr);
79     cnt  = 0;
80     for (i=rstart; i<rend; i++) {
81       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
82       if (nz > 1) cnt++;
83       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
84     }
85     ierr = PetscMalloc(cnt*sizeof(PetscInt),&rows);CHKERRQ(ierr);
86     ierr = PetscMalloc((rend - rstart - cnt)*sizeof(PetscInt),&drows);CHKERRQ(ierr);
87 
88     /* list non-diagonal rows on process */
89     cnt = 0; dcnt = 0;
90     for (i=rstart; i<rend; i++) {
91       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
92       if (nz > 1) rows[cnt++] = i;
93       else drows[dcnt++] = i - rstart;
94       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
95     }
96 
97     /* create PetscLayout for non-diagonal rows on each process */
98     ierr   = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
99     ierr   = PetscLayoutSetLocalSize(map,cnt);CHKERRQ(ierr);
100     ierr   = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
101     ierr   = PetscLayoutSetUp(map);CHKERRQ(ierr);
102     rstart = map->rstart;
103     rend   = map->rend;
104 
105     /* create PetscLayout for load-balanced non-diagonal rows on each process */
106     ierr = PetscLayoutCreate(comm,&nmap);CHKERRQ(ierr);
107     ierr = MPI_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
108     ierr = PetscLayoutSetSize(nmap,ncnt);CHKERRQ(ierr);
109     ierr = PetscLayoutSetBlockSize(nmap,1);CHKERRQ(ierr);
110     ierr = PetscLayoutSetUp(nmap);CHKERRQ(ierr);
111 
112     ierr = MatGetSize(pc->pmat,&NN,PETSC_NULL);CHKERRQ(ierr);
113     ierr = PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));CHKERRQ(ierr);
114     /*
115         this code is taken from VecScatterCreate_PtoS()
116         Determines what rows need to be moved where to
117         load balance the non-diagonal rows
118     */
119     /*  count number of contributors to each processor */
120     ierr   = PetscMalloc2(size,PetscMPIInt,&nprocs,cnt,PetscInt,&owner);CHKERRQ(ierr);
121     ierr   = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
122     j      = 0;
123     nsends = 0;
124     for (i=rstart; i<rend; i++) {
125       if (i < nmap->range[j]) j = 0;
126       for (; j<size; j++) {
127         if (i < nmap->range[j+1]) {
128           if (!nprocs[j]++) nsends++;
129           owner[i-rstart] = j;
130           break;
131         }
132       }
133     }
134     /* inform other processors of number of messages and max length*/
135     ierr      = PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);CHKERRQ(ierr);
136     ierr      = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
137     ierr      = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
138     recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
139 
140     /* post receives:  rvalues - rows I will own; count - nu */
141     ierr  = PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);CHKERRQ(ierr);
142     count = 0;
143     for (i=0; i<nrecvs; i++) {
144       ierr   = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
145       count += olengths1[i];
146     }
147 
148     /* do sends:
149        1) starts[i] gives the starting index in svalues for stuff going to
150        the ith processor
151     */
152     ierr      = PetscMalloc3(cnt,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size,PetscInt,&starts);CHKERRQ(ierr);
153     starts[0] = 0;
154     for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
155     for (i=0; i<cnt; i++)  svalues[starts[owner[i]]++] = rows[i];
156     for (i=0; i<cnt; i++)  rows[i] = rows[i] - rstart;
157     red->drows = drows;
158     red->dcnt  = dcnt;
159     ierr       = PetscFree(rows);CHKERRQ(ierr);
160 
161     starts[0] = 0;
162     for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
163     count = 0;
164     for (i=0; i<size; i++) {
165       if (nprocs[i]) {
166         ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
167       }
168     }
169 
170     /*  wait on receives */
171     count = nrecvs;
172     slen  = 0;
173     while (count) {
174       ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
175       /* unpack receives into our local space */
176       ierr  = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
177       slen += n;
178       count--;
179     }
180     if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
181 
182     ierr = ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);CHKERRQ(ierr);
183 
184     /* free up all work space */
185     ierr = PetscFree(olengths1);CHKERRQ(ierr);
186     ierr = PetscFree(onodes1);CHKERRQ(ierr);
187     ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
188     ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
189     if (nsends) {   /* wait on sends */
190       ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
191       ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
192       ierr = PetscFree(send_status);CHKERRQ(ierr);
193     }
194     ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
195     ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
196     ierr = PetscLayoutDestroy(&nmap);CHKERRQ(ierr);
197 
198     ierr = VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);CHKERRQ(ierr);
199     ierr = VecDuplicate(red->b,&red->x);CHKERRQ(ierr);
200     ierr = MatGetVecs(pc->pmat,&tvec,PETSC_NULL);CHKERRQ(ierr);
201     ierr = VecScatterCreate(tvec,red->is,red->b,PETSC_NULL,&red->scatter);CHKERRQ(ierr);
202     ierr = VecDestroy(&tvec);CHKERRQ(ierr);
203     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
204     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
205     ierr = MatDestroy(&tmat);CHKERRQ(ierr);
206   }
207 
208   /* get diagonal portion of matrix */
209   ierr = PetscMalloc(red->dcnt*sizeof(PetscScalar),&red->diag);CHKERRQ(ierr);
210   ierr = MatGetVecs(pc->pmat,&diag,PETSC_NULL);CHKERRQ(ierr);
211   ierr = MatGetDiagonal(pc->pmat,diag);CHKERRQ(ierr);
212   ierr = VecGetArrayRead(diag,&d);CHKERRQ(ierr);
213   for (i=0; i<red->dcnt; i++) red->diag[i] = 1.0/d[red->drows[i]];
214   ierr = VecRestoreArrayRead(diag,&d);CHKERRQ(ierr);
215   ierr = VecDestroy(&diag);CHKERRQ(ierr);
216   ierr = KSPSetUp(red->ksp);CHKERRQ(ierr);
217   PetscFunctionReturn(0);
218 }
219 
220 #undef __FUNCT__
221 #define __FUNCT__ "PCApply_Redistribute"
222 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
223 {
224   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
225   PetscErrorCode    ierr;
226   PetscInt          dcnt   = red->dcnt,i;
227   const PetscInt    *drows = red->drows;
228   PetscScalar       *xwork;
229   const PetscScalar *bwork,*diag = red->diag;
230 
231   PetscFunctionBegin;
232   if (!red->work) {
233     ierr = VecDuplicate(b,&red->work);CHKERRQ(ierr);
234   }
235   /* compute the rows of solution that have diagonal entries only */
236   ierr = VecSet(x,0.0);CHKERRQ(ierr);         /* x = diag(A)^{-1} b */
237   ierr = VecGetArray(x,&xwork);CHKERRQ(ierr);
238   ierr = VecGetArrayRead(b,&bwork);CHKERRQ(ierr);
239   for (i=0; i<dcnt; i++) xwork[drows[i]] = diag[i]*bwork[drows[i]];
240   ierr = PetscLogFlops(dcnt);CHKERRQ(ierr);
241   ierr = VecRestoreArray(red->work,&xwork);CHKERRQ(ierr);
242   ierr = VecRestoreArrayRead(b,&bwork);CHKERRQ(ierr);
243   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
244   ierr = MatMult(pc->pmat,x,red->work);CHKERRQ(ierr);
245   ierr = VecAYPX(red->work,-1.0,b);CHKERRQ(ierr);   /* red->work = b - A x */
246 
247   ierr = VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
248   ierr = VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
249   ierr = KSPSolve(red->ksp,red->b,red->x);CHKERRQ(ierr);
250   ierr = VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
251   ierr = VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "PCDestroy_Redistribute"
257 static PetscErrorCode PCDestroy_Redistribute(PC pc)
258 {
259   PC_Redistribute *red = (PC_Redistribute*)pc->data;
260   PetscErrorCode  ierr;
261 
262   PetscFunctionBegin;
263   ierr = VecScatterDestroy(&red->scatter);CHKERRQ(ierr);
264   ierr = ISDestroy(&red->is);CHKERRQ(ierr);
265   ierr = VecDestroy(&red->b);CHKERRQ(ierr);
266   ierr = VecDestroy(&red->x);CHKERRQ(ierr);
267   ierr = KSPDestroy(&red->ksp);CHKERRQ(ierr);
268   ierr = VecDestroy(&red->work);CHKERRQ(ierr);
269   ierr = PetscFree(red->drows);CHKERRQ(ierr);
270   ierr = PetscFree(red->diag);CHKERRQ(ierr);
271   ierr = PetscFree(pc->data);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "PCSetFromOptions_Redistribute"
277 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc)
278 {
279   PetscErrorCode  ierr;
280   PC_Redistribute *red = (PC_Redistribute*)pc->data;
281 
282   PetscFunctionBegin;
283   ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #undef __FUNCT__
288 #define __FUNCT__ "PCRedistributeGetKSP"
289 /*@
290    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE
291 
292    Not Collective
293 
294    Input Parameter:
295 .  pc - the preconditioner context
296 
297    Output Parameter:
298 .  innerksp - the inner KSP
299 
300    Level: advanced
301 
302 .keywords: PC, redistribute solve
303 @*/
304 PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
305 {
306   PC_Redistribute *red = (PC_Redistribute*)pc->data;
307 
308   PetscFunctionBegin;
309   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
310   PetscValidPointer(innerksp,2);
311   *innerksp = red->ksp;
312   PetscFunctionReturn(0);
313 }
314 
315 /* -------------------------------------------------------------------------------------*/
316 /*MC
317      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix
318 
319      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>
320 
321      Notes:  Usually run this with -ksp_type preonly
322 
323      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
324      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
325 
326      This does NOT call a partitioner to reorder rows to lower communication; the ordering of the rows in the original matrix and redistributed matrix is the same.
327 
328      Developer Notes: Should add an option to this preconditioner to use a partitioner to redistribute the rows to lower communication.
329 
330    Level: intermediate
331 
332 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
333 M*/
334 
335 EXTERN_C_BEGIN
336 #undef __FUNCT__
337 #define __FUNCT__ "PCCreate_Redistribute"
338 PetscErrorCode  PCCreate_Redistribute(PC pc)
339 {
340   PetscErrorCode  ierr;
341   PC_Redistribute *red;
342   const char      *prefix;
343 
344   PetscFunctionBegin;
345   ierr     = PetscNewLog(pc,PC_Redistribute,&red);CHKERRQ(ierr);
346   pc->data = (void*)red;
347 
348   pc->ops->apply          = PCApply_Redistribute;
349   pc->ops->applytranspose = 0;
350   pc->ops->setup          = PCSetUp_Redistribute;
351   pc->ops->destroy        = PCDestroy_Redistribute;
352   pc->ops->setfromoptions = PCSetFromOptions_Redistribute;
353   pc->ops->view           = PCView_Redistribute;
354 
355   ierr = KSPCreate(((PetscObject)pc)->comm,&red->ksp);CHKERRQ(ierr);
356   ierr = PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);CHKERRQ(ierr);
357   ierr = PetscLogObjectParent(pc,red->ksp);CHKERRQ(ierr);
358   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
359   ierr = KSPSetOptionsPrefix(red->ksp,prefix);CHKERRQ(ierr);
360   ierr = KSPAppendOptionsPrefix(red->ksp,"redistribute_");CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 EXTERN_C_END
364