xref: /petsc/src/vec/is/ao/impls/memscalable/aomemscalable.c (revision 1447629fc24590da3991bec7ed146bff2c847561)
1*1447629fSBarry Smith 
2*1447629fSBarry Smith /*
3*1447629fSBarry Smith     The memory scalable AO application ordering routines. These store the
4*1447629fSBarry Smith   local orderings on each processor.
5*1447629fSBarry Smith */
6*1447629fSBarry Smith 
7*1447629fSBarry Smith #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/
8*1447629fSBarry Smith 
9*1447629fSBarry Smith typedef struct {
10*1447629fSBarry Smith   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
11*1447629fSBarry Smith   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
12*1447629fSBarry Smith   PetscLayout map;         /* determines the local sizes of ao */
13*1447629fSBarry Smith } AO_MemoryScalable;
14*1447629fSBarry Smith 
15*1447629fSBarry Smith /*
16*1447629fSBarry Smith        All processors have the same data so processor 1 prints it
17*1447629fSBarry Smith */
18*1447629fSBarry Smith #undef __FUNCT__
19*1447629fSBarry Smith #define __FUNCT__ "AOView_MemoryScalable"
20*1447629fSBarry Smith PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
21*1447629fSBarry Smith {
22*1447629fSBarry Smith   PetscErrorCode    ierr;
23*1447629fSBarry Smith   PetscMPIInt       rank,size;
24*1447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
25*1447629fSBarry Smith   PetscBool         iascii;
26*1447629fSBarry Smith   PetscMPIInt       tag_app,tag_petsc;
27*1447629fSBarry Smith   PetscLayout       map = aomems->map;
28*1447629fSBarry Smith   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
29*1447629fSBarry Smith   MPI_Status        status;
30*1447629fSBarry Smith 
31*1447629fSBarry Smith   PetscFunctionBegin;
32*1447629fSBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
33*1447629fSBarry Smith   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);
34*1447629fSBarry Smith 
35*1447629fSBarry Smith   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);CHKERRQ(ierr);
36*1447629fSBarry Smith   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);CHKERRQ(ierr);
37*1447629fSBarry Smith 
38*1447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_app);CHKERRQ(ierr);
39*1447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);CHKERRQ(ierr);
40*1447629fSBarry Smith 
41*1447629fSBarry Smith   if (!rank) {
42*1447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);CHKERRQ(ierr);
43*1447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");CHKERRQ(ierr);
44*1447629fSBarry Smith 
45*1447629fSBarry Smith     ierr = PetscMalloc2(map->N,PetscInt,&app,map->N,PetscInt,&petsc);CHKERRQ(ierr);
46*1447629fSBarry Smith     len  = map->n;
47*1447629fSBarry Smith     /* print local AO */
48*1447629fSBarry Smith     ierr = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);CHKERRQ(ierr);
49*1447629fSBarry Smith     for (i=0; i<len; i++) {
50*1447629fSBarry Smith       ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);CHKERRQ(ierr);
51*1447629fSBarry Smith     }
52*1447629fSBarry Smith 
53*1447629fSBarry Smith     /* recv and print off-processor's AO */
54*1447629fSBarry Smith     for (i=1; i<size; i++) {
55*1447629fSBarry Smith       len       = map->range[i+1] - map->range[i];
56*1447629fSBarry Smith       app_loc   = app  + map->range[i];
57*1447629fSBarry Smith       petsc_loc = petsc+ map->range[i];
58*1447629fSBarry Smith       ierr      = MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
59*1447629fSBarry Smith       ierr      = MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);CHKERRQ(ierr);
60*1447629fSBarry Smith       ierr      = PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);CHKERRQ(ierr);
61*1447629fSBarry Smith       for (j=0; j<len; j++) {
62*1447629fSBarry Smith         ierr = PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);CHKERRQ(ierr);
63*1447629fSBarry Smith       }
64*1447629fSBarry Smith     }
65*1447629fSBarry Smith     ierr = PetscFree2(app,petsc);CHKERRQ(ierr);
66*1447629fSBarry Smith 
67*1447629fSBarry Smith   } else {
68*1447629fSBarry Smith     /* send values */
69*1447629fSBarry Smith     ierr = MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
70*1447629fSBarry Smith     ierr = MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));CHKERRQ(ierr);
71*1447629fSBarry Smith   }
72*1447629fSBarry Smith   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
73*1447629fSBarry Smith   PetscFunctionReturn(0);
74*1447629fSBarry Smith }
75*1447629fSBarry Smith 
76*1447629fSBarry Smith #undef __FUNCT__
77*1447629fSBarry Smith #define __FUNCT__ "AODestroy_MemoryScalable"
78*1447629fSBarry Smith PetscErrorCode AODestroy_MemoryScalable(AO ao)
79*1447629fSBarry Smith {
80*1447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
81*1447629fSBarry Smith   PetscErrorCode    ierr;
82*1447629fSBarry Smith 
83*1447629fSBarry Smith   PetscFunctionBegin;
84*1447629fSBarry Smith   ierr = PetscFree2(aomems->app_loc,aomems->petsc_loc);CHKERRQ(ierr);
85*1447629fSBarry Smith   ierr = PetscLayoutDestroy(&aomems->map);CHKERRQ(ierr);
86*1447629fSBarry Smith   ierr = PetscFree(aomems);CHKERRQ(ierr);
87*1447629fSBarry Smith   PetscFunctionReturn(0);
88*1447629fSBarry Smith }
89*1447629fSBarry Smith 
90*1447629fSBarry Smith /*
91*1447629fSBarry Smith    Input Parameters:
92*1447629fSBarry Smith +   ao - the application ordering context
93*1447629fSBarry Smith .   n  - the number of integers in ia[]
94*1447629fSBarry Smith .   ia - the integers; these are replaced with their mapped value
95*1447629fSBarry Smith -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"
96*1447629fSBarry Smith 
97*1447629fSBarry Smith    Output Parameter:
98*1447629fSBarry Smith .   ia - the mapped interges
99*1447629fSBarry Smith  */
100*1447629fSBarry Smith #undef __FUNCT__
101*1447629fSBarry Smith #define __FUNCT__ "AOMap_MemoryScalable_private"
102*1447629fSBarry Smith PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
103*1447629fSBarry Smith {
104*1447629fSBarry Smith   PetscErrorCode    ierr;
105*1447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
106*1447629fSBarry Smith   MPI_Comm          comm;
107*1447629fSBarry Smith   PetscMPIInt       rank,size,tag1,tag2;
108*1447629fSBarry Smith   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
109*1447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
110*1447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
111*1447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
112*1447629fSBarry Smith   MPI_Status        recv_status;
113*1447629fSBarry Smith   PetscMPIInt       nindices,source,widx;
114*1447629fSBarry Smith   PetscInt          *rbuf,*sbuf;
115*1447629fSBarry Smith   MPI_Status        *send_status,*send_status2;
116*1447629fSBarry Smith 
117*1447629fSBarry Smith   PetscFunctionBegin;
118*1447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)ao,&comm);CHKERRQ(ierr);
119*1447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
120*1447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
121*1447629fSBarry Smith 
122*1447629fSBarry Smith   /*  first count number of contributors to each processor */
123*1447629fSBarry Smith   ierr = PetscMalloc2(2*size,PetscInt,&nprocs,size,PetscInt,&start);CHKERRQ(ierr);
124*1447629fSBarry Smith   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
125*1447629fSBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
126*1447629fSBarry Smith   ierr = PetscMemzero(owner,n*sizeof(PetscInt));CHKERRQ(ierr);
127*1447629fSBarry Smith 
128*1447629fSBarry Smith   j       = 0;
129*1447629fSBarry Smith   lastidx = -1;
130*1447629fSBarry Smith   for (i=0; i<n; i++) {
131*1447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
132*1447629fSBarry Smith     if (lastidx > (idx = ia[i])) j = 0;
133*1447629fSBarry Smith     lastidx = idx;
134*1447629fSBarry Smith     for (; j<size; j++) {
135*1447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
136*1447629fSBarry Smith         nprocs[2*j]++;     /* num of indices to be sent */
137*1447629fSBarry Smith         nprocs[2*j+1] = 1; /* send to proc[j] */
138*1447629fSBarry Smith         owner[i]      = j;
139*1447629fSBarry Smith         break;
140*1447629fSBarry Smith       }
141*1447629fSBarry Smith     }
142*1447629fSBarry Smith   }
143*1447629fSBarry Smith   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
144*1447629fSBarry Smith   nsends        = 0;
145*1447629fSBarry Smith   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
146*1447629fSBarry Smith 
147*1447629fSBarry Smith   /* inform other processors of number of messages and max length*/
148*1447629fSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
149*1447629fSBarry Smith 
150*1447629fSBarry Smith   /* allocate arrays */
151*1447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag1);CHKERRQ(ierr);
152*1447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag2);CHKERRQ(ierr);
153*1447629fSBarry Smith 
154*1447629fSBarry Smith   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
155*1447629fSBarry Smith   ierr = PetscMalloc2(nsends*nmax,PetscInt,&rindices2,nsends,MPI_Request,&recv_waits2);CHKERRQ(ierr);
156*1447629fSBarry Smith 
157*1447629fSBarry Smith   ierr = PetscMalloc3(n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
158*1447629fSBarry Smith   ierr = PetscMalloc3(n,PetscInt,&sindices2,nreceives,MPI_Request,&send_waits2,nreceives,MPI_Status,&send_status2);CHKERRQ(ierr);
159*1447629fSBarry Smith 
160*1447629fSBarry Smith   /* post 1st receives: receive others requests
161*1447629fSBarry Smith      since we don't know how long each individual message is we
162*1447629fSBarry Smith      allocate the largest needed buffer for each receive. Potentially
163*1447629fSBarry Smith      this is a lot of wasted space.
164*1447629fSBarry Smith   */
165*1447629fSBarry Smith   for (i=0,count=0; i<nreceives; i++) {
166*1447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);CHKERRQ(ierr);
167*1447629fSBarry Smith   }
168*1447629fSBarry Smith 
169*1447629fSBarry Smith   /* do 1st sends:
170*1447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
171*1447629fSBarry Smith          the ith processor
172*1447629fSBarry Smith   */
173*1447629fSBarry Smith   start[0] = 0;
174*1447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
175*1447629fSBarry Smith   for (i=0; i<n; i++) {
176*1447629fSBarry Smith     j = owner[i];
177*1447629fSBarry Smith     if (j != rank) {
178*1447629fSBarry Smith       sindices[start[j]++]  = ia[i];
179*1447629fSBarry Smith     } else { /* compute my own map */
180*1447629fSBarry Smith       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
181*1447629fSBarry Smith         ia[i] = maploc[ia[i]-owners[rank]];
182*1447629fSBarry Smith       } else {
183*1447629fSBarry Smith         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
184*1447629fSBarry Smith       }
185*1447629fSBarry Smith     }
186*1447629fSBarry Smith   }
187*1447629fSBarry Smith 
188*1447629fSBarry Smith   start[0] = 0;
189*1447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
190*1447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
191*1447629fSBarry Smith     if (nprocs[2*i+1]) {
192*1447629fSBarry Smith       /* send my request to others */
193*1447629fSBarry Smith       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag1,comm,send_waits+count);CHKERRQ(ierr);
194*1447629fSBarry Smith       /* post receive for the answer of my request */
195*1447629fSBarry Smith       ierr = MPI_Irecv(sindices2+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);CHKERRQ(ierr);
196*1447629fSBarry Smith       count++;
197*1447629fSBarry Smith     }
198*1447629fSBarry Smith   }
199*1447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
200*1447629fSBarry Smith 
201*1447629fSBarry Smith   /* wait on 1st sends */
202*1447629fSBarry Smith   if (nsends) {
203*1447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
204*1447629fSBarry Smith   }
205*1447629fSBarry Smith 
206*1447629fSBarry Smith   /* 1st recvs: other's requests */
207*1447629fSBarry Smith   for (j=0; j< nreceives; j++) {
208*1447629fSBarry Smith     ierr   = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr); /* idx: index of handle for operation that completed */
209*1447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
210*1447629fSBarry Smith     rbuf   = rindices+nmax*widx; /* global index */
211*1447629fSBarry Smith     source = recv_status.MPI_SOURCE;
212*1447629fSBarry Smith 
213*1447629fSBarry Smith     /* compute mapping */
214*1447629fSBarry Smith     sbuf = rbuf;
215*1447629fSBarry Smith     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];
216*1447629fSBarry Smith 
217*1447629fSBarry Smith     /* send mapping back to the sender */
218*1447629fSBarry Smith     ierr = MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);CHKERRQ(ierr);
219*1447629fSBarry Smith   }
220*1447629fSBarry Smith 
221*1447629fSBarry Smith   /* wait on 2nd sends */
222*1447629fSBarry Smith   if (nreceives) {
223*1447629fSBarry Smith     ierr = MPI_Waitall(nreceives,send_waits2,send_status2);CHKERRQ(ierr);
224*1447629fSBarry Smith   }
225*1447629fSBarry Smith 
226*1447629fSBarry Smith   /* 2nd recvs: for the answer of my request */
227*1447629fSBarry Smith   for (j=0; j< nsends; j++) {
228*1447629fSBarry Smith     ierr   = MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);CHKERRQ(ierr);
229*1447629fSBarry Smith     ierr   = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
230*1447629fSBarry Smith     source = recv_status.MPI_SOURCE;
231*1447629fSBarry Smith     /* pack output ia[] */
232*1447629fSBarry Smith     rbuf  = sindices2+start[source];
233*1447629fSBarry Smith     count = 0;
234*1447629fSBarry Smith     for (i=0; i<n; i++) {
235*1447629fSBarry Smith       if (source == owner[i]) ia[i] = rbuf[count++];
236*1447629fSBarry Smith     }
237*1447629fSBarry Smith   }
238*1447629fSBarry Smith 
239*1447629fSBarry Smith   /* free arrays */
240*1447629fSBarry Smith   ierr = PetscFree2(nprocs,start);CHKERRQ(ierr);
241*1447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
242*1447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
243*1447629fSBarry Smith   ierr = PetscFree2(rindices2,recv_waits2);CHKERRQ(ierr);
244*1447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
245*1447629fSBarry Smith   ierr = PetscFree3(sindices2,send_waits2,send_status2);CHKERRQ(ierr);
246*1447629fSBarry Smith   PetscFunctionReturn(0);
247*1447629fSBarry Smith }
248*1447629fSBarry Smith 
249*1447629fSBarry Smith #undef __FUNCT__
250*1447629fSBarry Smith #define __FUNCT__ "AOPetscToApplication_MemoryScalable"
251*1447629fSBarry Smith PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
252*1447629fSBarry Smith {
253*1447629fSBarry Smith   PetscErrorCode    ierr;
254*1447629fSBarry Smith   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
255*1447629fSBarry Smith   PetscInt          *app_loc = aomems->app_loc;
256*1447629fSBarry Smith 
257*1447629fSBarry Smith   PetscFunctionBegin;
258*1447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,app_loc);CHKERRQ(ierr);
259*1447629fSBarry Smith   PetscFunctionReturn(0);
260*1447629fSBarry Smith }
261*1447629fSBarry Smith 
262*1447629fSBarry Smith #undef __FUNCT__
263*1447629fSBarry Smith #define __FUNCT__ "AOApplicationToPetsc_MemoryScalable"
264*1447629fSBarry Smith PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
265*1447629fSBarry Smith {
266*1447629fSBarry Smith   PetscErrorCode    ierr;
267*1447629fSBarry Smith   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
268*1447629fSBarry Smith   PetscInt          *petsc_loc = aomems->petsc_loc;
269*1447629fSBarry Smith 
270*1447629fSBarry Smith   PetscFunctionBegin;
271*1447629fSBarry Smith   ierr = AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);CHKERRQ(ierr);
272*1447629fSBarry Smith   PetscFunctionReturn(0);
273*1447629fSBarry Smith }
274*1447629fSBarry Smith 
275*1447629fSBarry Smith static struct _AOOps AOOps_MemoryScalable = {
276*1447629fSBarry Smith   AOView_MemoryScalable,
277*1447629fSBarry Smith   AODestroy_MemoryScalable,
278*1447629fSBarry Smith   AOPetscToApplication_MemoryScalable,
279*1447629fSBarry Smith   AOApplicationToPetsc_MemoryScalable,
280*1447629fSBarry Smith   0,
281*1447629fSBarry Smith   0,
282*1447629fSBarry Smith   0,
283*1447629fSBarry Smith   0
284*1447629fSBarry Smith };
285*1447629fSBarry Smith 
286*1447629fSBarry Smith #undef __FUNCT__
287*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable_private"
288*1447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
289*1447629fSBarry Smith {
290*1447629fSBarry Smith   PetscErrorCode    ierr;
291*1447629fSBarry Smith   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
292*1447629fSBarry Smith   PetscLayout       map     = aomems->map;
293*1447629fSBarry Smith   PetscInt          n_local = map->n,i,j;
294*1447629fSBarry Smith   PetscMPIInt       rank,size,tag;
295*1447629fSBarry Smith   PetscInt          *owner,*start,*nprocs,nsends,nreceives;
296*1447629fSBarry Smith   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
297*1447629fSBarry Smith   PetscInt          *owners = aomems->map->range;
298*1447629fSBarry Smith   MPI_Request       *send_waits,*recv_waits;
299*1447629fSBarry Smith   MPI_Status        recv_status;
300*1447629fSBarry Smith   PetscMPIInt       nindices,widx;
301*1447629fSBarry Smith   PetscInt          *rbuf;
302*1447629fSBarry Smith   PetscInt          n=napp,ip,ia;
303*1447629fSBarry Smith   MPI_Status        *send_status;
304*1447629fSBarry Smith 
305*1447629fSBarry Smith   PetscFunctionBegin;
306*1447629fSBarry Smith   ierr = PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
307*1447629fSBarry Smith 
308*1447629fSBarry Smith   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
309*1447629fSBarry Smith   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
310*1447629fSBarry Smith 
311*1447629fSBarry Smith   /*  first count number of contributors (of from_array[]) to each processor */
312*1447629fSBarry Smith   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
313*1447629fSBarry Smith   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
314*1447629fSBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&owner);CHKERRQ(ierr);
315*1447629fSBarry Smith 
316*1447629fSBarry Smith   j       = 0;
317*1447629fSBarry Smith   lastidx = -1;
318*1447629fSBarry Smith   for (i=0; i<n; i++) {
319*1447629fSBarry Smith     /* if indices are NOT locally sorted, need to start search at the beginning */
320*1447629fSBarry Smith     if (lastidx > (idx = from_array[i])) j = 0;
321*1447629fSBarry Smith     lastidx = idx;
322*1447629fSBarry Smith     for (; j<size; j++) {
323*1447629fSBarry Smith       if (idx >= owners[j] && idx < owners[j+1]) {
324*1447629fSBarry Smith         nprocs[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
325*1447629fSBarry Smith         nprocs[2*j+1] = 1; /* send to proc[j] */
326*1447629fSBarry Smith         owner[i]      = j;
327*1447629fSBarry Smith         break;
328*1447629fSBarry Smith       }
329*1447629fSBarry Smith     }
330*1447629fSBarry Smith   }
331*1447629fSBarry Smith   nprocs[2*rank]=nprocs[2*rank+1]=0; /* do not receive from self! */
332*1447629fSBarry Smith   nsends        = 0;
333*1447629fSBarry Smith   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
334*1447629fSBarry Smith 
335*1447629fSBarry Smith   /* inform other processors of number of messages and max length*/
336*1447629fSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
337*1447629fSBarry Smith 
338*1447629fSBarry Smith   /* allocate arrays */
339*1447629fSBarry Smith   ierr = PetscObjectGetNewTag((PetscObject)ao,&tag);CHKERRQ(ierr);
340*1447629fSBarry Smith   ierr = PetscMalloc2(nreceives*nmax,PetscInt,&rindices,nreceives,MPI_Request,&recv_waits);CHKERRQ(ierr);
341*1447629fSBarry Smith   ierr = PetscMalloc3(2*n,PetscInt,&sindices,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);CHKERRQ(ierr);
342*1447629fSBarry Smith   ierr = PetscMalloc(size*sizeof(PetscInt),&start);CHKERRQ(ierr);
343*1447629fSBarry Smith 
344*1447629fSBarry Smith   /* post receives: */
345*1447629fSBarry Smith   for (i=0; i<nreceives; i++) {
346*1447629fSBarry Smith     ierr = MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
347*1447629fSBarry Smith   }
348*1447629fSBarry Smith 
349*1447629fSBarry Smith   /* do sends:
350*1447629fSBarry Smith       1) starts[i] gives the starting index in svalues for stuff going to
351*1447629fSBarry Smith          the ith processor
352*1447629fSBarry Smith   */
353*1447629fSBarry Smith   start[0] = 0;
354*1447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
355*1447629fSBarry Smith   for (i=0; i<n; i++) {
356*1447629fSBarry Smith     j = owner[i];
357*1447629fSBarry Smith     if (j != rank) {
358*1447629fSBarry Smith       ip                   = from_array[i];
359*1447629fSBarry Smith       ia                   = to_array[i];
360*1447629fSBarry Smith       sindices[start[j]++] = ip;
361*1447629fSBarry Smith       sindices[start[j]++] = ia;
362*1447629fSBarry Smith     } else { /* compute my own map */
363*1447629fSBarry Smith       ip            = from_array[i] - owners[rank];
364*1447629fSBarry Smith       ia            = to_array[i];
365*1447629fSBarry Smith       aomap_loc[ip] = ia;
366*1447629fSBarry Smith     }
367*1447629fSBarry Smith   }
368*1447629fSBarry Smith 
369*1447629fSBarry Smith   start[0] = 0;
370*1447629fSBarry Smith   for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
371*1447629fSBarry Smith   for (i=0,count=0; i<size; i++) {
372*1447629fSBarry Smith     if (nprocs[2*i+1]) {
373*1447629fSBarry Smith       ierr = MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count);CHKERRQ(ierr);
374*1447629fSBarry Smith       count++;
375*1447629fSBarry Smith     }
376*1447629fSBarry Smith   }
377*1447629fSBarry Smith   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);
378*1447629fSBarry Smith 
379*1447629fSBarry Smith   /* wait on sends */
380*1447629fSBarry Smith   if (nsends) {
381*1447629fSBarry Smith     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382*1447629fSBarry Smith   }
383*1447629fSBarry Smith 
384*1447629fSBarry Smith   /* recvs */
385*1447629fSBarry Smith   count=0;
386*1447629fSBarry Smith   for (j= nreceives; j>0; j--) {
387*1447629fSBarry Smith     ierr = MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);CHKERRQ(ierr);
388*1447629fSBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_INT,&nindices);CHKERRQ(ierr);
389*1447629fSBarry Smith     rbuf = rindices+nmax*widx; /* global index */
390*1447629fSBarry Smith 
391*1447629fSBarry Smith     /* compute local mapping */
392*1447629fSBarry Smith     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
393*1447629fSBarry Smith       ip            = rbuf[i] - owners[rank]; /* local index */
394*1447629fSBarry Smith       ia            = rbuf[i+1];
395*1447629fSBarry Smith       aomap_loc[ip] = ia;
396*1447629fSBarry Smith     }
397*1447629fSBarry Smith     count++;
398*1447629fSBarry Smith   }
399*1447629fSBarry Smith 
400*1447629fSBarry Smith   ierr = PetscFree(start);CHKERRQ(ierr);
401*1447629fSBarry Smith   ierr = PetscFree3(sindices,send_waits,send_status);CHKERRQ(ierr);
402*1447629fSBarry Smith   ierr = PetscFree2(rindices,recv_waits);CHKERRQ(ierr);
403*1447629fSBarry Smith   ierr = PetscFree(owner);CHKERRQ(ierr);
404*1447629fSBarry Smith   ierr = PetscFree(nprocs);CHKERRQ(ierr);
405*1447629fSBarry Smith   PetscFunctionReturn(0);
406*1447629fSBarry Smith }
407*1447629fSBarry Smith 
408*1447629fSBarry Smith EXTERN_C_BEGIN
409*1447629fSBarry Smith #undef __FUNCT__
410*1447629fSBarry Smith #define __FUNCT__ "AOCreate_MemoryScalable"
411*1447629fSBarry Smith PetscErrorCode AOCreate_MemoryScalable(AO ao)
412*1447629fSBarry Smith {
413*1447629fSBarry Smith   PetscErrorCode    ierr;
414*1447629fSBarry Smith   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
415*1447629fSBarry Smith   const PetscInt    *mypetsc,*myapp;
416*1447629fSBarry Smith   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
417*1447629fSBarry Smith   MPI_Comm          comm;
418*1447629fSBarry Smith   AO_MemoryScalable *aomems;
419*1447629fSBarry Smith   PetscLayout       map;
420*1447629fSBarry Smith   PetscMPIInt       size,rank;
421*1447629fSBarry Smith 
422*1447629fSBarry Smith   PetscFunctionBegin;
423*1447629fSBarry Smith   /* create special struct aomems */
424*1447629fSBarry Smith   ierr     = PetscNewLog(ao, AO_MemoryScalable, &aomems);CHKERRQ(ierr);
425*1447629fSBarry Smith   ao->data = (void*) aomems;
426*1447629fSBarry Smith   ierr     = PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));CHKERRQ(ierr);
427*1447629fSBarry Smith   ierr     = PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
428*1447629fSBarry Smith 
429*1447629fSBarry Smith   /* transmit all local lengths of isapp to all processors */
430*1447629fSBarry Smith   ierr = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
431*1447629fSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
432*1447629fSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
433*1447629fSBarry Smith   ierr = PetscMalloc2(size,PetscInt,&lens,size,PetscInt,&disp);CHKERRQ(ierr);
434*1447629fSBarry Smith   ierr = ISGetLocalSize(isapp,&napp);CHKERRQ(ierr);
435*1447629fSBarry Smith   ierr = MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);CHKERRQ(ierr);
436*1447629fSBarry Smith 
437*1447629fSBarry Smith   N = 0;
438*1447629fSBarry Smith   for (i = 0; i < size; i++) {
439*1447629fSBarry Smith     disp[i] = N;
440*1447629fSBarry Smith     N      += lens[i];
441*1447629fSBarry Smith   }
442*1447629fSBarry Smith 
443*1447629fSBarry Smith   /* If ispetsc is 0 then use "natural" numbering */
444*1447629fSBarry Smith   if (napp) {
445*1447629fSBarry Smith     if (!ispetsc) {
446*1447629fSBarry Smith       start = disp[rank];
447*1447629fSBarry Smith       ierr  = PetscMalloc((napp+1) * sizeof(PetscInt), &petsc);CHKERRQ(ierr);
448*1447629fSBarry Smith       for (i=0; i<napp; i++) petsc[i] = start + i;
449*1447629fSBarry Smith     } else {
450*1447629fSBarry Smith       ierr  = ISGetIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
451*1447629fSBarry Smith       petsc = (PetscInt*)mypetsc;
452*1447629fSBarry Smith     }
453*1447629fSBarry Smith   }
454*1447629fSBarry Smith 
455*1447629fSBarry Smith   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
456*1447629fSBarry Smith   ierr    = PetscLayoutCreate(comm,&map);CHKERRQ(ierr);
457*1447629fSBarry Smith   map->bs = 1;
458*1447629fSBarry Smith   map->N  = N;
459*1447629fSBarry Smith   ierr    = PetscLayoutSetUp(map);CHKERRQ(ierr);
460*1447629fSBarry Smith 
461*1447629fSBarry Smith   ao->N       = N;
462*1447629fSBarry Smith   ao->n       = map->n;
463*1447629fSBarry Smith   aomems->map = map;
464*1447629fSBarry Smith 
465*1447629fSBarry Smith   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
466*1447629fSBarry Smith   n_local = map->n;
467*1447629fSBarry Smith   ierr    = PetscMalloc2(n_local,PetscInt, &aomems->app_loc,n_local,PetscInt,&aomems->petsc_loc);CHKERRQ(ierr);
468*1447629fSBarry Smith   ierr    = PetscLogObjectMemory(ao,2*n_local*sizeof(PetscInt));CHKERRQ(ierr);
469*1447629fSBarry Smith   ierr    = PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
470*1447629fSBarry Smith   ierr    = PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));CHKERRQ(ierr);
471*1447629fSBarry Smith   ierr    = ISGetIndices(isapp,&myapp);CHKERRQ(ierr);
472*1447629fSBarry Smith 
473*1447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);CHKERRQ(ierr);
474*1447629fSBarry Smith   ierr = AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);CHKERRQ(ierr);
475*1447629fSBarry Smith 
476*1447629fSBarry Smith   ierr = ISRestoreIndices(isapp,&myapp);CHKERRQ(ierr);
477*1447629fSBarry Smith   if (napp) {
478*1447629fSBarry Smith     if (ispetsc) {
479*1447629fSBarry Smith       ierr = ISRestoreIndices(ispetsc,&mypetsc);CHKERRQ(ierr);
480*1447629fSBarry Smith     } else {
481*1447629fSBarry Smith       ierr = PetscFree(petsc);CHKERRQ(ierr);
482*1447629fSBarry Smith     }
483*1447629fSBarry Smith   }
484*1447629fSBarry Smith   ierr = PetscFree2(lens,disp);CHKERRQ(ierr);
485*1447629fSBarry Smith   PetscFunctionReturn(0);
486*1447629fSBarry Smith }
487*1447629fSBarry Smith EXTERN_C_END
488*1447629fSBarry Smith 
489*1447629fSBarry Smith #undef __FUNCT__
490*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalable"
491*1447629fSBarry Smith /*@C
492*1447629fSBarry Smith    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.
493*1447629fSBarry Smith 
494*1447629fSBarry Smith    Collective on MPI_Comm
495*1447629fSBarry Smith 
496*1447629fSBarry Smith    Input Parameters:
497*1447629fSBarry Smith +  comm - MPI communicator that is to share AO
498*1447629fSBarry Smith .  napp - size of integer arrays
499*1447629fSBarry Smith .  myapp - integer array that defines an ordering
500*1447629fSBarry Smith -  mypetsc - integer array that defines another ordering (may be NULL to
501*1447629fSBarry Smith              indicate the natural ordering, that is 0,1,2,3,...)
502*1447629fSBarry Smith 
503*1447629fSBarry Smith    Output Parameter:
504*1447629fSBarry Smith .  aoout - the new application ordering
505*1447629fSBarry Smith 
506*1447629fSBarry Smith    Level: beginner
507*1447629fSBarry Smith 
508*1447629fSBarry Smith     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
509*1447629fSBarry Smith            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
510*1447629fSBarry Smith            Comparing with AOCreateBasic(), this routine trades memory with message communication.
511*1447629fSBarry Smith 
512*1447629fSBarry Smith .keywords: AO, create
513*1447629fSBarry Smith 
514*1447629fSBarry Smith .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
515*1447629fSBarry Smith @*/
516*1447629fSBarry Smith PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
517*1447629fSBarry Smith {
518*1447629fSBarry Smith   PetscErrorCode ierr;
519*1447629fSBarry Smith   IS             isapp,ispetsc;
520*1447629fSBarry Smith   const PetscInt *app=myapp,*petsc=mypetsc;
521*1447629fSBarry Smith 
522*1447629fSBarry Smith   PetscFunctionBegin;
523*1447629fSBarry Smith   ierr = ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);CHKERRQ(ierr);
524*1447629fSBarry Smith   if (mypetsc) {
525*1447629fSBarry Smith     ierr = ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);CHKERRQ(ierr);
526*1447629fSBarry Smith   } else {
527*1447629fSBarry Smith     ispetsc = NULL;
528*1447629fSBarry Smith   }
529*1447629fSBarry Smith   ierr = AOCreateMemoryScalableIS(isapp,ispetsc,aoout);CHKERRQ(ierr);
530*1447629fSBarry Smith   ierr = ISDestroy(&isapp);CHKERRQ(ierr);
531*1447629fSBarry Smith   if (mypetsc) {
532*1447629fSBarry Smith     ierr = ISDestroy(&ispetsc);CHKERRQ(ierr);
533*1447629fSBarry Smith   }
534*1447629fSBarry Smith   PetscFunctionReturn(0);
535*1447629fSBarry Smith }
536*1447629fSBarry Smith 
537*1447629fSBarry Smith #undef __FUNCT__
538*1447629fSBarry Smith #define __FUNCT__ "AOCreateMemoryScalableIS"
539*1447629fSBarry Smith /*@C
540*1447629fSBarry Smith    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.
541*1447629fSBarry Smith 
542*1447629fSBarry Smith    Collective on IS
543*1447629fSBarry Smith 
544*1447629fSBarry Smith    Input Parameters:
545*1447629fSBarry Smith +  isapp - index set that defines an ordering
546*1447629fSBarry Smith -  ispetsc - index set that defines another ordering (may be NULL to use the
547*1447629fSBarry Smith              natural ordering)
548*1447629fSBarry Smith 
549*1447629fSBarry Smith    Output Parameter:
550*1447629fSBarry Smith .  aoout - the new application ordering
551*1447629fSBarry Smith 
552*1447629fSBarry Smith    Level: beginner
553*1447629fSBarry Smith 
554*1447629fSBarry Smith     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
555*1447629fSBarry Smith            that is there cannot be any "holes".
556*1447629fSBarry Smith            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
557*1447629fSBarry Smith .keywords: AO, create
558*1447629fSBarry Smith 
559*1447629fSBarry Smith .seealso: AOCreateMemoryScalable(),  AODestroy()
560*1447629fSBarry Smith @*/
561*1447629fSBarry Smith PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
562*1447629fSBarry Smith {
563*1447629fSBarry Smith   PetscErrorCode ierr;
564*1447629fSBarry Smith   MPI_Comm       comm;
565*1447629fSBarry Smith   AO             ao;
566*1447629fSBarry Smith 
567*1447629fSBarry Smith   PetscFunctionBegin;
568*1447629fSBarry Smith   ierr   = PetscObjectGetComm((PetscObject)isapp,&comm);CHKERRQ(ierr);
569*1447629fSBarry Smith   ierr   = AOCreate(comm,&ao);CHKERRQ(ierr);
570*1447629fSBarry Smith   ierr   = AOSetIS(ao,isapp,ispetsc);CHKERRQ(ierr);
571*1447629fSBarry Smith   ierr   = AOSetType(ao,AOMEMORYSCALABLE);CHKERRQ(ierr);
572*1447629fSBarry Smith   *aoout = ao;
573*1447629fSBarry Smith   PetscFunctionReturn(0);
574*1447629fSBarry Smith }
575