xref: /petsc/src/ksp/pc/impls/redistribute/redistribute.c (revision bbead8a294046a48bbbe8fbcc40ad4dafe54ecae)
1 
2 /*
3   This file defines a "solve the problem redistributely on each subgroup of processor" preconditioner.
4 */
5 #include <private/pcimpl.h>     /*I "petscksp.h" I*/
6 #include <petscksp.h>
7 
8 typedef struct {
9   KSP          ksp;
10   Vec          x,b;
11   VecScatter   scatter;
12   IS           is;
13   PetscInt     dcnt,*drows;   /* these are the local rows that have only diagonal entry */
14   PetscScalar  *diag;
15   Vec          work;
16 } PC_Redistribute;
17 
18 #undef __FUNCT__
19 #define __FUNCT__ "PCView_Redistribute"
20 static PetscErrorCode PCView_Redistribute(PC pc,PetscViewer viewer)
21 {
22   PC_Redistribute *red = (PC_Redistribute*)pc->data;
23   PetscErrorCode  ierr;
24   PetscBool       iascii,isstring;
25   PetscInt        ncnt,N;
26 
27   PetscFunctionBegin;
28   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
29   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
30   if (iascii) {
31     ierr = MPI_Allreduce(&red->dcnt,&ncnt,1,MPIU_INT,MPI_SUM,((PetscObject)pc)->comm);CHKERRQ(ierr);
32     ierr = MatGetSize(pc->pmat,&N,PETSC_NULL);CHKERRQ(ierr);
33     ierr = PetscViewerASCIIPrintf(viewer,"    Number rows eliminated %D Percentage rows eliminated %g\n",ncnt,100.0*((PetscReal)ncnt)/((PetscReal)N));CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(viewer,"  Redistribute preconditioner: \n");CHKERRQ(ierr);
35     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
36   } else if (isstring) {
37     ierr = PetscViewerStringSPrintf(viewer," Redistribute preconditioner");CHKERRQ(ierr);
38     ierr = KSPView(red->ksp,viewer);CHKERRQ(ierr);
39   } else SETERRQ1(((PetscObject)pc)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for PC redistribute",((PetscObject)viewer)->type_name);
40   PetscFunctionReturn(0);
41 }
42 
43 #undef __FUNCT__
44 #define __FUNCT__ "PCSetUp_Redistribute"
45 static PetscErrorCode PCSetUp_Redistribute(PC pc)
46 {
47   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
48   PetscErrorCode    ierr;
49   MPI_Comm          comm;
50   PetscInt          rstart,rend,i,nz,cnt,*rows,ncnt,dcnt,*drows;
51   PetscLayout       map,nmap;
52   PetscMPIInt       size,imdex,tag,n;
53   PetscInt          *source = PETSC_NULL;
54   PetscMPIInt       *nprocs = PETSC_NULL,nrecvs;
55   PetscInt          j,nsends;
56   PetscInt          *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
57   PetscInt          *rvalues,*svalues,recvtotal;
58   PetscMPIInt       *onodes1,*olengths1;
59   MPI_Request       *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
60   MPI_Status        recv_status,*send_status;
61   Vec               tvec,diag;
62   Mat               tmat;
63   const PetscScalar *d;
64 
65   PetscFunctionBegin;
66   if (pc->setupcalled) {
67     ierr = KSPGetOperators(red->ksp,PETSC_NULL,&tmat,PETSC_NULL);CHKERRQ(ierr);
68     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_REUSE_MATRIX,&tmat);CHKERRQ(ierr);
69     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
70   } else {
71     PetscInt NN;
72 
73     ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
74     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
75     ierr = PetscObjectGetNewTag((PetscObject)pc,&tag);CHKERRQ(ierr);
76 
77     /* count non-diagonal rows on process */
78     ierr = MatGetOwnershipRange(pc->mat,&rstart,&rend);CHKERRQ(ierr);
79     cnt  = 0;
80     for (i=rstart; i<rend; i++) {
81       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
82       if (nz > 1) cnt++;
83       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
84     }
85     ierr = PetscMalloc(cnt*sizeof(PetscInt),&rows);CHKERRQ(ierr);
86     ierr = PetscMalloc((rend - rstart - cnt)*sizeof(PetscInt),&drows);CHKERRQ(ierr);
87 
88     /* list non-diagonal rows on process */
89     cnt  = 0; dcnt = 0;
90     for (i=rstart; i<rend; i++) {
91       ierr = MatGetRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
92       if (nz > 1) rows[cnt++] = i;
93       else drows[dcnt++] = i - rstart;
94       ierr = MatRestoreRow(pc->mat,i,&nz,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
95     }
96 
97     /* create PetscLayout for non-diagonal rows on each process */
98     ierr = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
99     ierr = PetscLayoutSetLocalSize(map,cnt);CHKERRQ(ierr);
100     ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
101     ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
102     rstart = map->rstart;
103     rend   = map->rend;
104 
105     /* create PetscLayout for load-balanced non-diagonal rows on each process */
106     ierr = PetscLayoutCreate(comm,&nmap);CHKERRQ(ierr);
107     ierr = MPI_Allreduce(&cnt,&ncnt,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
108     ierr = PetscLayoutSetSize(nmap,ncnt);CHKERRQ(ierr);
109     ierr = PetscLayoutSetBlockSize(nmap,1);CHKERRQ(ierr);
110     ierr = PetscLayoutSetUp(nmap);CHKERRQ(ierr);
111 
112     ierr = MatGetSize(pc->pmat,&NN,PETSC_NULL);CHKERRQ(ierr);
113     ierr = PetscInfo2(pc,"Number of diagonal rows eliminated %d, percentage eliminated %g\n",NN-ncnt,((PetscReal)(NN-ncnt))/((PetscReal)(NN)));
114     /*
115 	this code is taken from VecScatterCreate_PtoS()
116 	Determines what rows need to be moved where to
117 	load balance the non-diagonal rows
118     */
119     /*  count number of contributors to each processor */
120     ierr = PetscMalloc2(size,PetscMPIInt,&nprocs,cnt,PetscInt,&owner);CHKERRQ(ierr);
121     ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
122     j      = 0;
123     nsends = 0;
124     for (i=rstart; i<rend; i++) {
125       if (i < nmap->range[j]) j = 0;
126       for (; j<size; j++) {
127 	if (i < nmap->range[j+1]) {
128 	  if (!nprocs[j]++) nsends++;
129 	  owner[i-rstart] = j;
130 	  break;
131 	}
132       }
133     }
134     /* inform other processors of number of messages and max length*/
135     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);CHKERRQ(ierr);
136     ierr = PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);CHKERRQ(ierr);
137     ierr = PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);CHKERRQ(ierr);
138     recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
139 
140     /* post receives:  rvalues - rows I will own; count - nu */
141     ierr = PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);CHKERRQ(ierr);
142     count  = 0;
143     for (i=0; i<nrecvs; i++) {
144       ierr  = MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);CHKERRQ(ierr);
145       count += olengths1[i];
146     }
147 
148     /* do sends:
149        1) starts[i] gives the starting index in svalues for stuff going to
150        the ith processor
151     */
152     ierr = PetscMalloc3(cnt,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size,PetscInt,&starts);CHKERRQ(ierr);
153     starts[0]  = 0;
154     for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
155     for (i=0; i<cnt; i++) {
156       svalues[starts[owner[i]]++] = rows[i];
157     }
158     for (i=0; i<cnt; i++) rows[i] = rows[i] - rstart;
159     red->drows = drows;
160     red->dcnt  = dcnt;
161     ierr = PetscFree(rows);CHKERRQ(ierr);
162 
163     starts[0] = 0;
164     for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
165     count = 0;
166     for (i=0; i<size; i++) {
167       if (nprocs[i]) {
168 	ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
169       }
170     }
171 
172     /*  wait on receives */
173     count  = nrecvs;
174     slen   = 0;
175     while (count) {
176       ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
177       /* unpack receives into our local space */
178       ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
179       slen += n;
180       count--;
181     }
182     if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
183 
184     ierr = ISCreateGeneral(comm,slen,rvalues,PETSC_COPY_VALUES,&red->is);CHKERRQ(ierr);
185 
186     /* free up all work space */
187     ierr = PetscFree(olengths1);CHKERRQ(ierr);
188     ierr = PetscFree(onodes1);CHKERRQ(ierr);
189     ierr = PetscFree3(rvalues,source,recv_waits);CHKERRQ(ierr);
190     ierr = PetscFree2(nprocs,owner);CHKERRQ(ierr);
191     if (nsends) {   /* wait on sends */
192       ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
193       ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
194       ierr = PetscFree(send_status);CHKERRQ(ierr);
195     }
196     ierr = PetscFree3(svalues,send_waits,starts);CHKERRQ(ierr);
197     ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
198     ierr = PetscLayoutDestroy(&nmap);CHKERRQ(ierr);
199 
200     ierr = VecCreateMPI(comm,slen,PETSC_DETERMINE,&red->b);CHKERRQ(ierr);
201     ierr = VecDuplicate(red->b,&red->x);CHKERRQ(ierr);
202     ierr = MatGetVecs(pc->pmat,&tvec,PETSC_NULL);CHKERRQ(ierr);
203     ierr = VecScatterCreate(tvec,red->is,red->b,PETSC_NULL,&red->scatter);CHKERRQ(ierr);
204     ierr = VecDestroy(&tvec);CHKERRQ(ierr);
205     ierr = MatGetSubMatrix(pc->pmat,red->is,red->is,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
206     ierr = KSPSetOperators(red->ksp,tmat,tmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
207     ierr = MatDestroy(&tmat);CHKERRQ(ierr);
208   }
209 
210   /* get diagonal portion of matrix */
211   ierr = PetscMalloc(red->dcnt*sizeof(PetscScalar),&red->diag);CHKERRQ(ierr);
212   ierr = MatGetVecs(pc->pmat,&diag,PETSC_NULL);CHKERRQ(ierr);
213   ierr = MatGetDiagonal(pc->pmat,diag);CHKERRQ(ierr);
214   ierr = VecGetArrayRead(diag,&d);CHKERRQ(ierr);
215   for (i=0; i<red->dcnt; i++) {
216     red->diag[i] = 1.0/d[red->drows[i]];
217   }
218   ierr = VecRestoreArrayRead(diag,&d);CHKERRQ(ierr);
219   ierr = VecDestroy(&diag);CHKERRQ(ierr);
220   ierr = KSPSetUp(red->ksp);CHKERRQ(ierr);
221   PetscFunctionReturn(0);
222 }
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "PCApply_Redistribute"
226 static PetscErrorCode PCApply_Redistribute(PC pc,Vec b,Vec x)
227 {
228   PC_Redistribute   *red = (PC_Redistribute*)pc->data;
229   PetscErrorCode    ierr;
230   PetscInt          dcnt = red->dcnt,i;
231   const PetscInt    *drows = red->drows;
232   PetscScalar       *xwork;
233   const PetscScalar *bwork,*diag = red->diag;
234 
235   PetscFunctionBegin;
236   if (!red->work) {
237     ierr = VecDuplicate(b,&red->work);CHKERRQ(ierr);
238   }
239   /* compute the rows of solution that have diagonal entries only */
240   ierr = VecSet(x,0.0);CHKERRQ(ierr);         /* x = diag(A)^{-1} b */
241   ierr = VecGetArray(x,&xwork);CHKERRQ(ierr);
242   ierr = VecGetArrayRead(b,&bwork);CHKERRQ(ierr);
243   for (i=0; i<dcnt; i++) {
244     xwork[drows[i]] = diag[i]*bwork[drows[i]];
245   }
246   ierr = PetscLogFlops(dcnt);CHKERRQ(ierr);
247   ierr = VecRestoreArray(red->work,&xwork);CHKERRQ(ierr);
248   ierr = VecRestoreArrayRead(b,&bwork);CHKERRQ(ierr);
249   /* update the right hand side for the reduced system with diagonal rows (and corresponding columns) removed */
250   ierr = MatMult(pc->pmat,x,red->work);CHKERRQ(ierr);
251   ierr = VecAYPX(red->work,-1.0,b);CHKERRQ(ierr);   /* red->work = b - A x */
252 
253   ierr = VecScatterBegin(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
254   ierr = VecScatterEnd(red->scatter,red->work,red->b,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
255   ierr = KSPSolve(red->ksp,red->b,red->x);CHKERRQ(ierr);
256   ierr = VecScatterBegin(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
257   ierr = VecScatterEnd(red->scatter,red->x,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 #undef __FUNCT__
262 #define __FUNCT__ "PCDestroy_Redistribute"
263 static PetscErrorCode PCDestroy_Redistribute(PC pc)
264 {
265   PC_Redistribute *red = (PC_Redistribute*)pc->data;
266   PetscErrorCode  ierr;
267 
268   PetscFunctionBegin;
269   ierr = VecScatterDestroy(&red->scatter);CHKERRQ(ierr);
270   ierr = ISDestroy(&red->is);CHKERRQ(ierr);
271   ierr = VecDestroy(&red->b);CHKERRQ(ierr);
272   ierr = VecDestroy(&red->x);CHKERRQ(ierr);
273   ierr = KSPDestroy(&red->ksp);CHKERRQ(ierr);
274   ierr = VecDestroy(&red->work);CHKERRQ(ierr);
275   ierr = PetscFree(red->drows);
276   ierr = PetscFree(red->diag);
277   ierr = PetscFree(pc->data);CHKERRQ(ierr);
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "PCSetFromOptions_Redistribute"
283 static PetscErrorCode PCSetFromOptions_Redistribute(PC pc)
284 {
285   PetscErrorCode  ierr;
286   PC_Redistribute *red = (PC_Redistribute*)pc->data;
287 
288   PetscFunctionBegin;
289   ierr = KSPSetFromOptions(red->ksp);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "PCRedistributeGetKSP"
295 /*@
296    PCRedistributeGetKSP - Gets the KSP created by the PCREDISTRIBUTE
297 
298    Not Collective
299 
300    Input Parameter:
301 .  pc - the preconditioner context
302 
303    Output Parameter:
304 .  innerksp - the inner KSP
305 
306    Level: advanced
307 
308 .keywords: PC, redistribute solve
309 @*/
310 PetscErrorCode  PCRedistributeGetKSP(PC pc,KSP *innerksp)
311 {
312   PC_Redistribute *red = (PC_Redistribute*)pc->data;
313 
314   PetscFunctionBegin;
315   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
316   PetscValidPointer(innerksp,2);
317   *innerksp = red->ksp;
318   PetscFunctionReturn(0);
319 }
320 
321 /* -------------------------------------------------------------------------------------*/
322 /*MC
323      PCREDISTRIBUTE - Redistributes a matrix for load balancing, removing the rows that only have a diagonal entry and then applys a KSP to that new matrix
324 
325      Options for the redistribute preconditioners can be set with -redistribute_ksp_xxx <values> and -redistribute_pc_xxx <values>
326 
327      Notes:  Usually run this with -ksp_type preonly
328 
329      If you have used MatZeroRows() to eliminate (for example, Dirichlet) boundary conditions for a symmetric problem then you can use, for example, -ksp_type preonly
330      -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc to take advantage of the symmetry.
331 
332    Level: intermediate
333 
334 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PCRedistributeGetKSP()
335 M*/
336 
337 EXTERN_C_BEGIN
338 #undef __FUNCT__
339 #define __FUNCT__ "PCCreate_Redistribute"
340 PetscErrorCode  PCCreate_Redistribute(PC pc)
341 {
342   PetscErrorCode  ierr;
343   PC_Redistribute *red;
344   const char      *prefix;
345 
346   PetscFunctionBegin;
347   ierr = PetscNewLog(pc,PC_Redistribute,&red);CHKERRQ(ierr);
348   pc->data            = (void*)red;
349 
350   pc->ops->apply           = PCApply_Redistribute;
351   pc->ops->applytranspose  = 0;
352   pc->ops->setup           = PCSetUp_Redistribute;
353   pc->ops->destroy         = PCDestroy_Redistribute;
354   pc->ops->setfromoptions  = PCSetFromOptions_Redistribute;
355   pc->ops->view            = PCView_Redistribute;
356 
357   ierr = KSPCreate(((PetscObject)pc)->comm,&red->ksp);CHKERRQ(ierr);
358   ierr = PetscObjectIncrementTabLevel((PetscObject)red->ksp,(PetscObject)pc,1);CHKERRQ(ierr);
359   ierr = PetscLogObjectParent(pc,red->ksp);CHKERRQ(ierr);
360   ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
361   ierr = KSPSetOptionsPrefix(red->ksp,prefix);CHKERRQ(ierr);
362   ierr = KSPAppendOptionsPrefix(red->ksp,"redistribute_");CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 EXTERN_C_END
366