xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <private/pcimpl.h>     /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP          ksp;
10   Vec          x,b;
11   VecScatter   scatter;
12   IS           is;
13   PetscInt     dcnt,*drows;   /* these are the local rows that have only diagonal entry */
14   PetscScalar  *diag;
15   Vec          work;
16 } PC_Redistribute;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "PCView_Redistribute"
20 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
21 {
22   PC_Redistribute *red = (PC_Redistribute*)pc->data;
23   PetscErrorCode  ierr;
24   PetscBool       iascii,isstring;
25   PetscInt        ncnt,N;
26 
27   PetscFunctionBegin;
28   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
29   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
30   if (iascii) {
31     ierr = MPI_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
32     ierr = MatGetSize(pc->pmat,&N,PETSC_NULL);CHKERRQ(ierr);
33     ierr = PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");CHKERRQ(ierr);
35     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
36   } else if (isstring) {
37     ierr = PetscViewerStringSPrintf(viewer," Redistribute preconditioner");CHKERRQ(ierr);
38     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
39   } else {
40     SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PC redistribute",((PetscObject)viewer)->type_name);
41   }
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCSetUp_Redistribute"
47 static PetscErrorCode PCSetUp_Redistribute(PC pc)
48 {
49   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
50   PetscErrorCode    ierr;
51   MPI_Comm          comm;
52   PetscInt          rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
53   PetscLayout       map,nmap;
54   PetscMPIInt       size,imdex,tag,n;
55   PetscInt          *source = PETSC_NULL;
56   PetscMPIInt       *nprocs = PETSC_NULL,nrecvs;
57   PetscInt          j,nsends;
58   PetscInt          *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
59   PetscInt          *rvalues,*svalues,recvtotal;
60   PetscMPIInt       *onodes1,*olengths1;
61   MPI_Request       *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
62   MPI_Status        recv_status,*send_status;
63   Vec               tvec,diag;
64   Mat               tmat;
65   const PetscScalar *d;
66 
67   PetscFunctionBegin;
68   if (pc->setupcalled) {
69     ierr = KSPGetOperators(red->ksp,PETSC_NULL,&tmat,PETSC_NULL);CHKERRQ(ierr);
70     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);CHKERRQ(ierr);
71     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
72   } else {
73     PetscInt NN;
74 
75     ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
76     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
77     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
78 
79     /* count non-diagonal rows on process */
80     ierr = MatGetOwnershipRange(pc->mat,&rstart,&rend);CHKERRQ(ierr);
81     cnt  = 0;
82     for (i=rstart; i<rend; i++) {
83       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
84       if (nz > 1) cnt++;
85       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
86     }
87     ierr = PetscMalloc(cnt*sizeof(PetscInt),&rows);CHKERRQ(ierr);
88     ierr = PetscMalloc((rend - rstart - cnt)*sizeof(PetscInt),&drows);CHKERRQ(ierr);
89 
90     /* list non-diagonal rows on process */
91     cnt  = 0; dcnt = 0;
92     for (i=rstart; i<rend; i++) {
93       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
94       if (nz > 1) rows[cnt++] = i;
95       else drows[dcnt++] = i - rstart;
96       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
97     }
98 
99     /* create PetscLayout for non-diagonal rows on each process */
100     ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
101     ierr = PetscLayoutSetLocalSize(map,cnt);CHKERRQ(ierr);
102     ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
103     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
104     rstart = map->rstart;
105     rend   = map->rend;
106 
107     /* create PetscLayout for load-balanced non-diagonal rows on each process */
108     ierr = PetscLayoutCreate(comm,&nmap);CHKERRQ(ierr);
109     ierr = MPI_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
110     ierr = PetscLayoutSetSize(nmap,ncnt);CHKERRQ(ierr);
111     ierr = PetscLayoutSetBlockSize(nmap,1);CHKERRQ(ierr);
112     ierr = PetscLayoutSetUp(nmap);CHKERRQ(ierr);
113 
114     ierr = MatGetSize(pc->pmat,&NN,PETSC_NULL);CHKERRQ(ierr);
115     ierr = PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));
116     /*
117 	this code is taken from VecScatterCreate_PtoS()
118 	Determines what rows need to be moved where to
119 	load balance the non-diagonal rows
120     */
121     /*  count number of contributors to each processor */
122     ierr = PetscMalloc2(size,PetscMPIInt,&nprocs,cnt,PetscInt,&owner);CHKERRQ(ierr);
123     ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
124     j      = 0;
125     nsends = 0;
126     for (i=rstart; i<rend; i++) {
127       if (i < nmap->range[j]) j = 0;
128       for (; j<size; j++) {
129 	if (i < nmap->range[j+1]) {
130 	  if (!nprocs[j]++) nsends++;
131 	  owner[i-rstart] = j;
132 	  break;
133 	}
134       }
135     }
136     /* inform other processors of number of messages and max length*/
137     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);CHKERRQ(ierr);
138     ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
139     ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
140     recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
141 
142     /* post receives:  rvalues - rows I will own; count - nu */
143     ierr = PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);CHKERRQ(ierr);
144     count  = 0;
145     for (i=0; i<nrecvs; i++) {
146       ierr  = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
147       count += olengths1[i];
148     }
149 
150     /* do sends:
151        1) starts[i] gives the starting index in svalues for stuff going to
152        the ith processor
153     */
154     ierr = PetscMalloc3(cnt,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size,PetscInt,&starts);CHKERRQ(ierr);
155     starts[0]  = 0;
156     for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
157     for (i=0; i<cnt; i++) {
158       svalues[starts[owner[i]]++] = rows[i];
159     }
160     for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart;
161     red->drows = drows;
162     red->dcnt  = dcnt;
163     ierr = PetscFree(rows);CHKERRQ(ierr);
164 
165     starts[0] = 0;
166     for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
167     count = 0;
168     for (i=0; i<size; i++) {
169       if (nprocs[i]) {
170 	ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
171       }
172     }
173 
174     /*  wait on receives */
175     count  = nrecvs;
176     slen   = 0;
177     while (count) {
178       ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
179       /* unpack receives into our local space */
180       ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
181       slen += n;
182       count--;
183     }
184     if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
185 
186     ierr = ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);CHKERRQ(ierr);
187 
188     /* free up all work space */
189     ierr = PetscFree(olengths1);CHKERRQ(ierr);
190     ierr = PetscFree(onodes1);CHKERRQ(ierr);
191     ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
192     ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
193     if (nsends) {   /* wait on sends */
194       ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
195       ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
196       ierr = PetscFree(send_status);CHKERRQ(ierr);
197     }
198     ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
199     ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
200     ierr = PetscLayoutDestroy(&nmap);CHKERRQ(ierr);
201 
202     ierr = VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);CHKERRQ(ierr);
203     ierr = VecDuplicate(red->b,&red->x);CHKERRQ(ierr);
204     ierr = MatGetVecs(pc->pmat,&tvec,PETSC_NULL);CHKERRQ(ierr);
205     ierr = VecScatterCreate(tvec,red->is,red->b,PETSC_NULL,&red->scatter);CHKERRQ(ierr);
206     ierr = VecDestroy(&tvec);CHKERRQ(ierr);
207     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
208     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
209     ierr = MatDestroy(&tmat);CHKERRQ(ierr);
210   }
211 
212   /* get diagonal portion of matrix */
213   ierr = PetscMalloc(red->dcnt*sizeof(PetscScalar),&red->diag);CHKERRQ(ierr);
214   ierr = MatGetVecs(pc->pmat,&diag,PETSC_NULL);CHKERRQ(ierr);
215   ierr = MatGetDiagonal(pc->pmat,diag);CHKERRQ(ierr);
216   ierr = VecGetArrayRead(diag,&d);CHKERRQ(ierr);
217   for (i=0; i<red->dcnt; i++) {
218     red->diag[i] = 1.0/d[red->drows[i]];
219   }
220   ierr = VecRestoreArrayRead(diag,&d);CHKERRQ(ierr);
221   ierr = VecDestroy(&diag);CHKERRQ(ierr);
222   ierr = KSPSetUp(red->ksp);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "PCApply_Redistribute"
228 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
229 {
230   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
231   PetscErrorCode    ierr;
232   PetscInt          dcnt = red->dcnt,i;
233   const PetscInt    *drows = red->drows;
234   PetscScalar       *xwork;
235   const PetscScalar *bwork,*diag = red->diag;
236 
237   PetscFunctionBegin;
238   if (!red->work) {
239     ierr = VecDuplicate(b,&red->work);CHKERRQ(ierr);
240   }
241   /* compute the rows of solution that have diagonal entries only */
242   ierr = VecSet(x,0.0);CHKERRQ(ierr);         /* x = diag(A)^{-1} b */
243   ierr = VecGetArray(x,&xwork);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(b,&bwork);CHKERRQ(ierr);
245   for (i=0; i<dcnt; i++) {
246     xwork[drows[i]] = diag[i]*bwork[drows[i]];
247   }
248   ierr = PetscLogFlops(dcnt);CHKERRQ(ierr);
249   ierr = VecRestoreArray(red->work,&xwork);CHKERRQ(ierr);
250   ierr = VecRestoreArrayRead(b,&bwork);CHKERRQ(ierr);
251   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
252   ierr = MatMult(pc->pmat,x,red->work);CHKERRQ(ierr);
253   ierr = VecAYPX(red->work,-1.0,b);CHKERRQ(ierr);   /* red->work = b - A x */
254 
255   ierr = VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
256   ierr = VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
257   ierr = KSPSolve(red->ksp,red->b,red->x);CHKERRQ(ierr);
258   ierr = VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
259   ierr = VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "PCDestroy_Redistribute"
265 static PetscErrorCode PCDestroy_Redistribute(PC pc)
266 {
267   PC_Redistribute *red = (PC_Redistribute*)pc->data;
268   PetscErrorCode  ierr;
269 
270   PetscFunctionBegin;
271   ierr = VecScatterDestroy(&red->scatter);CHKERRQ(ierr);
272   ierr = ISDestroy(&red->is);CHKERRQ(ierr);
273   ierr = VecDestroy(&red->b);CHKERRQ(ierr);
274   ierr = VecDestroy(&red->x);CHKERRQ(ierr);
275   ierr = KSPDestroy(&red->ksp);CHKERRQ(ierr);
276   ierr = VecDestroy(&red->work);CHKERRQ(ierr);
277   ierr = PetscFree(red->drows);
278   ierr = PetscFree(red->diag);
279   ierr = PetscFree(pc->data);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "PCSetFromOptions_Redistribute"
285 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc)
286 {
287   PetscErrorCode  ierr;
288   PC_Redistribute *red = (PC_Redistribute*)pc->data;
289 
290   PetscFunctionBegin;
291   ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "PCRedistributeGetKSP"
297 /*@
298    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE
299 
300    Not Collective
301 
302    Input Parameter:
303 .  pc - the preconditioner context
304 
305    Output Parameter:
306 .  innerksp - the inner KSP
307 
308    Level: advanced
309 
310 .keywords: PC, redistribute solve
311 @*/
312 PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
313 {
314   PC_Redistribute *red = (PC_Redistribute*)pc->data;
315 
316   PetscFunctionBegin;
317   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
318   PetscValidPointer(innerksp,2);
319   *innerksp = red->ksp;
320   PetscFunctionReturn(0);
321 }
322 
323 /* -------------------------------------------------------------------------------------*/
324 /*MC
325      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix
326 
327      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>
328 
329      Notes:  Usually run this with -ksp_type preonly
330 
331      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
332      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
333 
334    Level: intermediate
335 
336 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
337 M*/
338 
339 EXTERN_C_BEGIN
340 #undef __FUNCT__
341 #define __FUNCT__ "PCCreate_Redistribute"
342 PetscErrorCode  PCCreate_Redistribute(PC pc)
343 {
344   PetscErrorCode  ierr;
345   PC_Redistribute *red;
346   const char      *prefix;
347 
348   PetscFunctionBegin;
349   ierr = PetscNewLog(pc,PC_Redistribute,&red);CHKERRQ(ierr);
350   pc->data            = (void*)red;
351 
352   pc->ops->apply           = PCApply_Redistribute;
353   pc->ops->applytranspose  = 0;
354   pc->ops->setup           = PCSetUp_Redistribute;
355   pc->ops->destroy         = PCDestroy_Redistribute;
356   pc->ops->setfromoptions  = PCSetFromOptions_Redistribute;
357   pc->ops->view            = PCView_Redistribute;
358 
359   ierr = KSPCreate(((PetscObject)pc)->comm,&red->ksp);CHKERRQ(ierr);
360   ierr = PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);CHKERRQ(ierr);
361   ierr = PetscLogObjectParent(pc,red->ksp);CHKERRQ(ierr);
362   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
363   ierr = KSPSetOptionsPrefix(red->ksp,prefix);CHKERRQ(ierr);
364   ierr = KSPAppendOptionsPrefix(red->ksp,"redistribute_");CHKERRQ(ierr);
365   PetscFunctionReturn(0);
366 }
367 EXTERN_C_END
368